{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 系统包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>jQuery(function() {if (jQuery(\"body.notebook_app\").length == 0) { jQuery(\".input_area\").toggle(); jQuery(\".prompt\").toggle();}});</script>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "import re\n",
    "import pickle\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import jieba\n",
    "# from wordcloud import WordCloud, STOPWORDS\n",
    "from gensim.models.word2vec import Word2Vec\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "import IPython.core.display as di \n",
    "di.display_html('<script>jQuery(function() {if (jQuery(\"body.notebook_app\").length == 0) { jQuery(\".input_area\").toggle(); jQuery(\".prompt\").toggle();}});</script>', raw=True)\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "pd.set_option('display.max_seq_items', 300)\n",
    "pd.set_option('display.max_rows', 100)\n",
    "pd.set_option('max_colwidth', 20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# $$一、数据查看与初步处理$$\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'影响': 1,\n",
       "  '高': 10,\n",
       "  '低': 4,\n",
       "  '有钱任性': 1,\n",
       "  '便宜': 14,\n",
       "  '死硬': 1,\n",
       "  '一般': 2,\n",
       "  '贵': 12,\n",
       "  '优惠': 29,\n",
       "  '不会后悔': 1,\n",
       "  '优惠太小': 1,\n",
       "  '才优惠': 1,\n",
       "  '才': 1,\n",
       "  '呵呵': 2,\n",
       "  '只优惠': 2,\n",
       "  '值': 3,\n",
       "  '套路': 1,\n",
       "  '好': 2,\n",
       "  '都不错': 1,\n",
       "  '喜欢': 1,\n",
       "  '烧机油': 1,\n",
       "  '好看': 3,\n",
       "  '坐进去最放松的': 1,\n",
       "  '不值': 1,\n",
       "  '性价比高': 3,\n",
       "  '不错': 1,\n",
       "  '优惠小': 1,\n",
       "  '可以': 1,\n",
       "  '愉快': 1,\n",
       "  '高昂': 1,\n",
       "  '合理': 1,\n",
       "  '下不来': 1,\n",
       "  '不会便宜': 2,\n",
       "  '不高': 3,\n",
       "  '不完善': 1,\n",
       "  '放弃': 1,\n",
       "  '差不多': 7,\n",
       "  '价格不够': 1,\n",
       "  '不平易近人': 1,\n",
       "  '价格老贵了': 1,\n",
       "  '犹如宝马一般': 1,\n",
       "  '不能比的': 1,\n",
       "  '没有保证': 1,\n",
       "  '高了好多': 1,\n",
       "  '价格贵': 2,\n",
       "  '差距不大': 1,\n",
       "  '给个更便宜的价格': 1,\n",
       "  '贵了': 1,\n",
       "  '价格差不多': 1,\n",
       "  '价格高': 2,\n",
       "  '价格一般': 1,\n",
       "  '价格不知道': 1,\n",
       "  '价格不错': 2,\n",
       "  '价格不合适': 1,\n",
       "  '比较低的': 1,\n",
       "  '合理就行': 1,\n",
       "  '不纠结': 1,\n",
       "  '行情价': 1,\n",
       "  '正常价格': 1,\n",
       "  '也就多几百': 1,\n",
       "  '参差不齐': 1,\n",
       "  '贵太多': 1,\n",
       "  '不便宜': 4,\n",
       "  '价格还行': 1,\n",
       "  '还行': 1,\n",
       "  '小贵': 2,\n",
       "  '没啥优惠': 2,\n",
       "  '合适就买': 1,\n",
       "  '挺高': 1,\n",
       "  '价差很大': 1,\n",
       "  '不菲': 2,\n",
       "  '低也不能要': 1,\n",
       "  '不要在意价格': 1,\n",
       "  '硬伤': 1,\n",
       "  '下降': 1,\n",
       "  '贵一点': 2,\n",
       "  '便宜些': 2,\n",
       "  '不贵': 5,\n",
       "  ' ': 1,\n",
       "  '厚道': 1,\n",
       "  '增加': 1,\n",
       "  '奇高': 1,\n",
       "  '噪音大': 1,\n",
       "  '不好': 1,\n",
       "  '比较合理': 1,\n",
       "  '最适合': 1,\n",
       "  '价格太贵': 1,\n",
       "  '价格较高': 1,\n",
       "  '便宜很多': 1,\n",
       "  '这个价格也超级优惠': 1,\n",
       "  '外观奇丑无比': 1,\n",
       "  '外观太丑': 1,\n",
       "  '最好': 1,\n",
       "  '操控满意': 1,\n",
       "  '很不错': 1,\n",
       "  '哪个贵': 1,\n",
       "  '好点': 1,\n",
       "  '没有优势': 1,\n",
       "  '可以的': 1,\n",
       "  '也贵': 1,\n",
       "  '闻名': 1,\n",
       "  '虚高': 1,\n",
       "  '高了': 1,\n",
       "  '相当不错': 1,\n",
       "  '理想': 1,\n",
       "  '油耗低': 1,\n",
       "  '性价比不算很高': 1,\n",
       "  '轴距和空间严重不符': 1,\n",
       "  '再贵的车也没有欧蓝德好': 1,\n",
       "  '价格好': 1,\n",
       "  '好用': 1,\n",
       "  '四驱无敌': 1,\n",
       "  '真心便宜，秒杀对手   ': 1,\n",
       "  '机舱里走管子不好走': 1,\n",
       "  '降价还是不要想了': 1,\n",
       "  '优惠力度还是可以的 ': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '较大后备箱': 1,\n",
       "  '风噪是大了点': 1,\n",
       "  '比森林人强不少': 1,\n",
       "  '隔音效果一般': 1,\n",
       "  '很良心': 1,\n",
       "  '找不出第二辆': 1,\n",
       "  '不用考虑了': 1,\n",
       "  '不靠谱': 1,\n",
       "  '没法比': 1,\n",
       "  '咬咬牙的': 1,\n",
       "  '还不如': 1,\n",
       "  '不合算': 1,\n",
       "  '给力': 1,\n",
       "  '少': 1,\n",
       "  '忽悠': 1,\n",
       "  '性价比': 1,\n",
       "  '差不太多': 1,\n",
       "  '不够': 1,\n",
       "  '烦心': 1,\n",
       "  '丑': 1,\n",
       "  '适中': 1,\n",
       "  '贵吧': 1,\n",
       "  '空间足够': 1,\n",
       "  '也不高': 1,\n",
       "  '价格又黑': 1,\n",
       "  '操控不错': 1,\n",
       "  '略贵': 1,\n",
       "  '太贵': 1,\n",
       "  '更高': 1,\n",
       "  '更恶心': 1,\n",
       "  '菜车': 1,\n",
       "  '好车': 1,\n",
       "  '再靠谱点': 1,\n",
       "  '没有性价比': 1,\n",
       "  '无敌': 1,\n",
       "  '舒适度也很高': 1,\n",
       "  '有一些优惠，但不会价格大跳水': 1,\n",
       "  '被套路了': 1,\n",
       "  '不错，无噪音': 1,\n",
       "  '纠结': 1},\n",
       " 1: {'普通': 1,\n",
       "  '还好': 1,\n",
       "  '松垮': 1,\n",
       "  '旧': 1,\n",
       "  '差': 8,\n",
       "  '不改改': 1,\n",
       "  '一点点': 1,\n",
       "  '不细致': 1,\n",
       "  '不错': 5,\n",
       "  '破破烂烂的': 1,\n",
       "  '烧机油': 5,\n",
       "  '坐进去最放松的': 1,\n",
       "  '影响驾驶': 1,\n",
       "  '好': 4,\n",
       "  '不感冒': 1,\n",
       "  '无法比': 1,\n",
       "  '面包车': 1,\n",
       "  '全塑料': 1,\n",
       "  '喜欢': 7,\n",
       "  '打95分': 1,\n",
       "  '不咋样': 1,\n",
       "  '认可度高': 1,\n",
       "  '皮实耐用': 1,\n",
       "  '提升太多': 1,\n",
       "  '好看': 2,\n",
       "  '电动尾门太慢': 1,\n",
       "  '挺炫': 1,\n",
       "  '中庸': 1,\n",
       "  '改了也不够': 1,\n",
       "  '完败': 2,\n",
       "  '要求一致': 1,\n",
       "  '简单不豪华': 1,\n",
       "  '才叫车': 1,\n",
       "  '太水': 1,\n",
       "  '手感爆炸': 1,\n",
       "  '不逊于': 1,\n",
       "  '时尚': 1,\n",
       "  '大改': 2,\n",
       "  '强': 4,\n",
       "  '没的说': 1,\n",
       "  '贵': 1,\n",
       "  '省油': 1,\n",
       "  '大气': 1,\n",
       "  '可以': 1,\n",
       "  '出毛病': 1,\n",
       "  '用处不大': 1,\n",
       "  '脱皮': 1,\n",
       "  '记忆': 1,\n",
       "  '绝对值': 1,\n",
       "  '豪华感强': 1,\n",
       "  '中上游': 1,\n",
       "  '粗糙': 2,\n",
       "  '好一些': 1,\n",
       "  '内饰豪华': 1,\n",
       "  '内饰绝对豪华': 1,\n",
       "  '内饰不咋地': 1,\n",
       "  '对口味': 1,\n",
       "  '可以保持': 1,\n",
       "  '内饰做工不提了，心知肚明': 1,\n",
       "  '内饰也可以': 1,\n",
       "  '不如现款': 1,\n",
       "  '后排空间小了点': 1,\n",
       "  '内饰太老气': 1,\n",
       "  '内饰全塑料': 1,\n",
       "  '内饰低档': 1,\n",
       "  '用料不错': 1,\n",
       "  '伤感情': 1,\n",
       "  '对内饰要求不高': 1,\n",
       "  '特别不舒服': 1,\n",
       "  '发动机异味': 1,\n",
       "  '没用': 1,\n",
       "  '一般': 2,\n",
       "  '内饰贼嚣张': 1,\n",
       "  '齐全就行': 1,\n",
       "  '内饰改改': 1,\n",
       "  '简洁干练': 1,\n",
       "  '不满+E+E11:E742': 1,\n",
       "  '硬伤': 1,\n",
       "  '差一点': 1,\n",
       "  '好看，炫酷': 1,\n",
       "  '令人发指': 1,\n",
       "  '有些粗': 1,\n",
       "  '全': 1,\n",
       "  '高出一个档次': 1,\n",
       "  '还可以': 2,\n",
       "  '性能好': 1,\n",
       "  '带喜感': 1,\n",
       "  '味道真不小': 1,\n",
       "  '好点': 2,\n",
       "  '太扯了': 1,\n",
       "  '满意': 1,\n",
       "  '不够整': 1,\n",
       "  '不如': 1,\n",
       "  '渣': 1,\n",
       "  '没戏': 1,\n",
       "  '好不少': 1,\n",
       "  '有点难受': 1,\n",
       "  '大': 1,\n",
       "  '不豪华': 1,\n",
       "  '太差': 2,\n",
       "  '比较差': 1,\n",
       "  '外形好看': 1,\n",
       "  '内饰太过简单粗糟   ': 1,\n",
       "  '比森林人舒服多了': 1,\n",
       "  '外观奇丑无比': 1,\n",
       "  '外观太小气': 1,\n",
       "  '外观真心赞': 1,\n",
       "  '还不如十万块的国产车': 1,\n",
       "  '细': 1,\n",
       "  '骚': 1,\n",
       "  '扯淡': 1,\n",
       "  '低': 1,\n",
       "  '廉价': 2,\n",
       "  '吐槽': 1,\n",
       "  '掉渣': 1,\n",
       "  '秒杀': 2,\n",
       "  '胜出': 1,\n",
       "  '提升': 1,\n",
       "  '不咋地': 3,\n",
       "  '帅': 1,\n",
       "  '制冷快': 1,\n",
       "  '新': 1,\n",
       "  '简约的很': 1,\n",
       "  '不敢恭维': 2,\n",
       "  '比较好': 1,\n",
       "  '气味呛人': 1,\n",
       "  '惨不忍睹': 1,\n",
       "  '设计上也是不错': 1,\n",
       "  '还过的去': 1,\n",
       "  '非常一般': 1,\n",
       "  '落后': 1,\n",
       "  '一股丰田味': 1,\n",
       "  '有提升': 1,\n",
       "  '不咋地 ': 1,\n",
       "  '后排座椅 格叽格叽响': 1,\n",
       "  '有点糙': 1,\n",
       "  '断轴': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '内饰有廉价感': 1,\n",
       "  '操纵好，底盘高': 1,\n",
       "  '内饰一般': 1,\n",
       "  '非常好': 1,\n",
       "  '油耗低': 1,\n",
       "  '不高': 1,\n",
       "  '比森林人强不少': 1,\n",
       "  '空间也大': 1,\n",
       "  '不像换代应有的水平': 1,\n",
       "  '多丑': 1,\n",
       "  '吊打': 1,\n",
       "  '配置高': 1,\n",
       "  '一般般': 1,\n",
       "  '更好': 1,\n",
       "  '有啥用': 1,\n",
       "  '设计感土土的': 1,\n",
       "  '居然': 1,\n",
       "  '真的不错': 1,\n",
       "  '不舒坦': 1,\n",
       "  '粗旷': 1,\n",
       "  '寒酸': 1,\n",
       "  '高档': 1,\n",
       "  '很好': 1,\n",
       "  '劣质材料': 1,\n",
       "  '侧面开裂外': 1,\n",
       "  '提高档次': 1,\n",
       "  '不是喜欢的': 1,\n",
       "  '漂亮': 1,\n",
       "  '不如人': 1,\n",
       "  '足够': 1,\n",
       "  '异响': 1,\n",
       "  '涉水': 1,\n",
       "  '可调': 1,\n",
       "  '内饰不错': 1,\n",
       "  '内饰老气': 1,\n",
       "  '舒服': 1,\n",
       "  ' 低': 1,\n",
       "  '座位太低': 1,\n",
       "  '很赞': 1,\n",
       "  '还行': 1,\n",
       "  '何来性能': 1,\n",
       "  '座椅开裂': 1,\n",
       "  '设计感太落后': 1,\n",
       "  '没办法吐槽': 1,\n",
       "  '前脸倒是2018款的好看': 1,\n",
       "  '完胜': 1,\n",
       "  '简单': 1,\n",
       "  '难看': 1,\n",
       "  '很棒': 1,\n",
       "  '垃圾': 1,\n",
       "  '恼火': 1},\n",
       " 2: {'省油': 6,\n",
       "  '灵敏': 1,\n",
       "  '反应慢': 1,\n",
       "  '熄火': 2,\n",
       "  '肉': 3,\n",
       "  '不佳': 1,\n",
       "  '舒服': 2,\n",
       "  '烧机油': 81,\n",
       "  '没有烧机油': 1,\n",
       "  '运气好': 1,\n",
       "  '不烧机油': 4,\n",
       "  '郁闷': 1,\n",
       "  '没那么明显': 1,\n",
       "  '纠结': 2,\n",
       "  '坑': 2,\n",
       "  '没有': 3,\n",
       "  '慢': 1,\n",
       "  '爆震': 8,\n",
       "  '很少': 1,\n",
       "  '怕': 2,\n",
       "  '失望': 4,\n",
       "  '没烧': 2,\n",
       "  '够用': 13,\n",
       "  '没': 1,\n",
       "  '耐用': 1,\n",
       "  '太吵': 1,\n",
       "  '鸡肋': 1,\n",
       "  '少': 1,\n",
       "  '好': 13,\n",
       "  '好像': 1,\n",
       "  '足': 2,\n",
       "  '没问题': 9,\n",
       "  '超车': 2,\n",
       "  '担心': 3,\n",
       "  '满意': 2,\n",
       "  '通病': 2,\n",
       "  '轻松': 2,\n",
       "  '不错': 11,\n",
       "  '不烧': 3,\n",
       "  '驾驶': 1,\n",
       "  '用不完的劲': 1,\n",
       "  '有点烧机油': 1,\n",
       "  '有点受不了': 1,\n",
       "  '不够': 5,\n",
       "  '够': 1,\n",
       "  '杠杠的': 3,\n",
       "  '烧': 2,\n",
       "  '破破烂烂的': 1,\n",
       "  '贵': 5,\n",
       "  '噪音': 3,\n",
       "  '就那么回事': 1,\n",
       "  '有问题': 1,\n",
       "  '好很很多': 1,\n",
       "  '隔音不好': 1,\n",
       "  '质量不行': 1,\n",
       "  '完全够用': 3,\n",
       "  '有一个缸体不工作，严重抖动': 1,\n",
       "  '可以': 2,\n",
       "  '啥都好': 1,\n",
       "  '不能比拟': 1,\n",
       "  '爽': 3,\n",
       "  '机油不好': 1,\n",
       "  '大': 3,\n",
       "  '差别没那么大': 1,\n",
       "  '颜值差': 1,\n",
       "  '全塑料': 1,\n",
       "  '有动力': 1,\n",
       "  '改观不少': 1,\n",
       "  '低': 5,\n",
       "  '打95分': 1,\n",
       "  '偏大': 1,\n",
       "  '不选': 1,\n",
       "  '高': 9,\n",
       "  '差': 6,\n",
       "  '代价大': 1,\n",
       "  '性价比高': 1,\n",
       "  '挺炫': 1,\n",
       "  '简单不豪华': 1,\n",
       "  '大改': 2,\n",
       "  '强': 3,\n",
       "  '大气': 1,\n",
       "  '不完善': 1,\n",
       "  '全时四驱': 3,\n",
       "  '一般': 2,\n",
       "  '升高': 1,\n",
       "  '担心不足': 1,\n",
       "  '加强': 1,\n",
       "  '一般般': 2,\n",
       "  '反应敏捷': 1,\n",
       "  '比不了': 1,\n",
       "  '很够用': 1,\n",
       "  '承受不起': 1,\n",
       "  '下降': 5,\n",
       "  '好看': 1,\n",
       "  '厉害': 1,\n",
       "  '好太多': 1,\n",
       "  '还行': 2,\n",
       "  '差的不是一点': 1,\n",
       "  '差的不是一点点': 1,\n",
       "  '足够': 3,\n",
       "  '还可以': 3,\n",
       "  '高原小霸王': 1,\n",
       "  '信心满满': 1,\n",
       "  '没落锁': 1,\n",
       "  '费油': 1,\n",
       "  '故障': 1,\n",
       "  '异响': 5,\n",
       "  '弱': 2,\n",
       "  '增加': 1,\n",
       "  '渗油': 2,\n",
       "  '温度高': 1,\n",
       "  '漏油': 5,\n",
       "  '设计缺陷': 1,\n",
       "  '没情况': 1,\n",
       "  '很爽': 2,\n",
       "  '机油消耗': 1,\n",
       "  '无损耗': 1,\n",
       "  '抖动': 1,\n",
       "  '省心': 2,\n",
       "  '换油': 1,\n",
       "  '生锈': 1,\n",
       "  '噪音偏大': 1,\n",
       "  '伤蓄电池': 1,\n",
       "  '重心低': 1,\n",
       "  '避免': 1,\n",
       "  '没关系': 1,\n",
       "  '不漏': 1,\n",
       "  '涉水': 2,\n",
       "  '锁死': 1,\n",
       "  '难点': 1,\n",
       "  '查不出': 1,\n",
       "  '哗啦声': 1,\n",
       "  '不敢': 1,\n",
       "  '败笔': 1,\n",
       "  '缺陷': 1,\n",
       "  '转速低': 1,\n",
       "  '负担大': 1,\n",
       "  '轰轰响': 1,\n",
       "  '噪音较大': 1,\n",
       "  '用的很好': 1,\n",
       "  '均衡': 1,\n",
       "  '牛车': 1,\n",
       "  '油耗上升': 1,\n",
       "  '绝对值': 1,\n",
       "  '动力不够': 2,\n",
       "  '降低动力': 1,\n",
       "  '空间不行': 2,\n",
       "  '别纠结动力了': 1,\n",
       "  '发动机通红': 1,\n",
       "  '轻盈': 1,\n",
       "  '压力大': 1,\n",
       "  '都行': 1,\n",
       "  '刺拉拉': 2,\n",
       "  '动力下降': 1,\n",
       "  '爆缸': 2,\n",
       "  '很舒适': 1,\n",
       "  '不行': 5,\n",
       "  '平顺自然': 1,\n",
       "  '重新打胶': 1,\n",
       "  '高了好多': 1,\n",
       "  '空间大动力可以': 1,\n",
       "  '优': 1,\n",
       "  '正常': 3,\n",
       "  '加速缓慢': 1,\n",
       "  '声音变小': 1,\n",
       "  '内饰豪华': 1,\n",
       "  '内饰绝对豪华': 1,\n",
       "  '太弱': 3,\n",
       "  '内饰不咋地': 1,\n",
       "  '不说动力差': 1,\n",
       "  '操控性好很多': 1,\n",
       "  '没觉动力不够': 1,\n",
       "  '动力更好': 1,\n",
       "  '加速好点': 1,\n",
       "  '没事': 1,\n",
       "  '一样': 1,\n",
       "  '影响散热': 1,\n",
       "  '油耗增加': 1,\n",
       "  '不解有问题': 1,\n",
       "  '动力好点': 2,\n",
       "  '堪比跑车': 1,\n",
       "  '跟不上': 1,\n",
       "  '操控很满意': 1,\n",
       "  '刹车软': 1,\n",
       "  '相当给力': 1,\n",
       "  '可以忽略': 1,\n",
       "  '油耗会上升': 1,\n",
       "  '比以前快': 1,\n",
       "  '冒烟了': 1,\n",
       "  '加速轻轻松松': 1,\n",
       "  '弱了点': 1,\n",
       "  '优势明显': 1,\n",
       "  '可以保持': 1,\n",
       "  '没啥区别': 1,\n",
       "  '干净': 2,\n",
       "  '差不多': 5,\n",
       "  '好不到哪去': 1,\n",
       "  '油耗好': 1,\n",
       "  '噪音有所下降': 1,\n",
       "  '发动机温度高': 1,\n",
       "  '发动机噪声大': 1,\n",
       "  '油耗高': 1,\n",
       "  '内饰也可以': 1,\n",
       "  '没什么影响': 1,\n",
       "  '油耗下降': 1,\n",
       "  '兼顾了太多东西': 1,\n",
       "  '发动机存在问题': 1,\n",
       "  '布局限制': 1,\n",
       "  '不会觉得不够用': 1,\n",
       "  '动力不足': 1,\n",
       "  '适当保养': 1,\n",
       "  '空间不小': 1,\n",
       "  '疯狂': 1,\n",
       "  '一方面': 1,\n",
       "  '有点降低': 1,\n",
       "  '不如': 2,\n",
       "  '已经很好了': 1,\n",
       "  '发动机没力': 1,\n",
       "  '加速抖动': 1,\n",
       "  '没感觉': 2,\n",
       "  '发动机异味': 1,\n",
       "  '动力呵呵': 1,\n",
       "  '好不少': 1,\n",
       "  '非常猛': 1,\n",
       "  '没法比': 1,\n",
       "  '发动机一直是弊病': 1,\n",
       "  '拔尖': 1,\n",
       "  '追不上': 1,\n",
       "  '后段不行': 1,\n",
       "  '没有问题': 1,\n",
       "  '磨损': 1,\n",
       "  '不够用': 1,\n",
       "  '动力足': 1,\n",
       "  '别提了': 1,\n",
       "  '急加速不行': 1,\n",
       "  '发动机嘶哑': 1,\n",
       "  '急加速问题': 1,\n",
       "  '差别明显': 1,\n",
       "  '宽大舒适': 1,\n",
       "  '不足': 6,\n",
       "  '减少': 1,\n",
       "  '逊色': 1,\n",
       "  '比不过': 1,\n",
       "  '不满+E+E11:E742': 1,\n",
       "  '噪音减少': 1,\n",
       "  '降了': 1,\n",
       "  '不如之前': 1,\n",
       "  '硬伤': 1,\n",
       "  '不咋地': 1,\n",
       "  '颇高': 1,\n",
       "  '不好': 1,\n",
       "  '强些': 1,\n",
       "  '强劲': 3,\n",
       "  '好看，炫酷': 1,\n",
       "  '很差': 1,\n",
       "  '噪音大': 2,\n",
       "  '捉急': 1,\n",
       "  '差些': 1,\n",
       "  '差点': 1,\n",
       "  '强很多': 1,\n",
       "  '漂亮': 1,\n",
       "  '声音大': 1,\n",
       "  '受不了': 1,\n",
       "  '难看': 1,\n",
       "  '效果不错': 1,\n",
       "  '驾驶质感强': 1,\n",
       "  '喜欢': 3,\n",
       "  '风险很大': 1,\n",
       "  '是好车': 1,\n",
       "  '外观奇丑无比': 1,\n",
       "  '外观太丑': 1,\n",
       "  '不擅长': 1,\n",
       "  '太好了': 1,\n",
       "  '不好受': 1,\n",
       "  '不少机油': 1,\n",
       "  '老坏': 1,\n",
       "  '严重烧机油': 1,\n",
       "  '肯定烧': 1,\n",
       "  '最好': 2,\n",
       "  '偏弱': 1,\n",
       "  '车挺好': 1,\n",
       "  '很一般': 2,\n",
       "  '别指望': 1,\n",
       "  '毛病多': 1,\n",
       "  '性能好': 2,\n",
       "  '破百': 1,\n",
       "  '秒杀': 2,\n",
       "  '好一些': 1,\n",
       "  '好点': 1,\n",
       "  '偏磨': 1,\n",
       "  '够了': 2,\n",
       "  '胜出': 1,\n",
       "  '提升': 2,\n",
       "  '也不错': 1,\n",
       "  '无敌': 1,\n",
       "  '出色': 1,\n",
       "  '可靠': 2,\n",
       "  '有点性能': 1,\n",
       "  '没得说': 1,\n",
       "  '不会很快': 1,\n",
       "  '超过': 1,\n",
       "  '可以的': 1,\n",
       "  '更高': 1,\n",
       "  '性能高一点': 1,\n",
       "  '好性能': 1,\n",
       "  '动力牛': 1,\n",
       "  '理想': 1,\n",
       "  '发动机很新啊': 1,\n",
       "  '再贵的车也没有欧蓝德好': 1,\n",
       "  '将开启汽车动力的新篇章': 1,\n",
       "  '连速腾起步都赶不上': 1,\n",
       "  '离地间隙低': 1,\n",
       "  '四驱无敌': 1,\n",
       "  '水平对置发动机就是轻啊': 1,\n",
       "  '动力不行': 1,\n",
       "  '动力强多': 1,\n",
       "  '断轴': 1,\n",
       "  '变速箱大众调教得很差': 1,\n",
       "  '操纵好，底盘高': 1,\n",
       "  '油耗真心棒': 1,\n",
       "  '很高': 1,\n",
       "  '油耗水平已经很满意': 1,\n",
       "  '油耗也不高': 1,\n",
       "  '较大后备箱': 1,\n",
       "  '发动机噪音确实大': 1,\n",
       "  '更硬': 1,\n",
       "  '比森林人强不少': 1,\n",
       "  '维修起来麻烦': 1,\n",
       "  '老牛逼。': 1,\n",
       "  '高性能发动机': 1,\n",
       "  '发动机烧漏机油风险大': 1,\n",
       "  '发动机是好': 1,\n",
       "  '变速箱落后是真的': 1,\n",
       "  '水平对置发动机 全时四驱是最大卖点': 1,\n",
       "  '水平对置发动机原来这么稳': 1,\n",
       "  '没见过跑14万还有那么好的机器': 1,\n",
       "  '发动机实在太老': 1,\n",
       "  '比直列的要轻很多': 1,\n",
       "  '车漆软': 2,\n",
       "  '车漆真软': 1,\n",
       "  '空间大': 2,\n",
       "  '动力和空间太诱惑了！': 1,\n",
       "  '不为别的，就为了中控台和空间': 1,\n",
       "  '空间较大': 1,\n",
       "  '水平对置横向占用空间太': 1,\n",
       "  '配置高': 1,\n",
       "  '差很远': 1,\n",
       "  '猛': 2,\n",
       "  '着迷': 1,\n",
       "  '扯淡': 1,\n",
       "  '真的不错': 1,\n",
       "  '不后悔': 2,\n",
       "  '牛逼': 1,\n",
       "  '太给力了': 1,\n",
       "  '感觉不到': 1,\n",
       "  '心理没底': 1,\n",
       "  '挺稳的': 1,\n",
       "  '不介意': 1,\n",
       "  '算我输': 1,\n",
       "  '垃圾': 1,\n",
       "  '偶尔': 1,\n",
       "  '没烧过': 1,\n",
       "  '没烧过机油': 1,\n",
       "  '问题多': 1,\n",
       "  '很好': 1,\n",
       "  '说不清': 1,\n",
       "  '无力': 4,\n",
       "  '不费力': 1,\n",
       "  '有眼光': 1,\n",
       "  '不如人': 1,\n",
       "  '不贵': 1,\n",
       "  '不如别人': 1,\n",
       "  '美好': 1,\n",
       "  '疲惫': 1,\n",
       "  '不忍直视': 1,\n",
       "  '足够了': 1,\n",
       "  '超车没问题': 1,\n",
       "  '轻微损耗': 1,\n",
       "  '发愁': 1,\n",
       "  '消耗快': 1,\n",
       "  '没太多效果': 1,\n",
       "  '别动': 1,\n",
       "  '嗡嗡响': 2,\n",
       "  '麻烦大': 1,\n",
       "  '很好用': 1,\n",
       "  '成熟': 1,\n",
       "  '轻爆声': 1,\n",
       "  '进一步增大': 1,\n",
       "  '负荷大': 1,\n",
       "  '凑乎': 1,\n",
       "  '密封不好渗油': 1,\n",
       "  '动力太差': 1,\n",
       "  '其他很low': 1,\n",
       "  '一脸懵逼': 1,\n",
       "  '动力感明显': 1,\n",
       "  '也不高': 1,\n",
       "  '动力大': 1,\n",
       "  '另类': 1,\n",
       "  '抖动一会': 1,\n",
       "  '动力衰竭': 1,\n",
       "  '初段还行': 1,\n",
       "  '噪声大': 1,\n",
       "  '动力一般': 1,\n",
       "  '日常足够': 1,\n",
       "  '正常足够用': 1,\n",
       "  '没影响': 1,\n",
       "  '很给力': 1,\n",
       "  '淘汰的发动机': 1,\n",
       "  '有点后悔': 1,\n",
       "  '偏高': 1,\n",
       "  '不太足': 1,\n",
       "  '十足': 1,\n",
       "  '顺滑': 1,\n",
       "  '不给力': 1,\n",
       "  '风险极大': 1,\n",
       "  '加速不顺畅': 1,\n",
       "  '发动机没声音': 1,\n",
       "  '噪音大、内饰普通': 1,\n",
       "  '空调有问题': 1,\n",
       "  '见鬼了': 1,\n",
       "  '稳': 1,\n",
       "  '顺畅': 1,\n",
       "  '找虐': 1,\n",
       "  '较强': 1,\n",
       "  '不太喜欢': 1,\n",
       "  '何来性能': 1,\n",
       "  '抓地力肯定提高了': 1,\n",
       "  '制冷效果差': 1,\n",
       "  '没办法吐槽': 1,\n",
       "  '有点野性': 1,\n",
       "  '各方面优于森林人': 1,\n",
       "  '发动机有劲': 1,\n",
       "  '比现款更强的，拭目以待吧': 1,\n",
       "  '发动机比较low': 1,\n",
       "  '爱它的水平对置发动机': 1,\n",
       "  '车漆是真薄': 1,\n",
       "  '不用讨论': 1},\n",
       " 3: {'在意': 1,\n",
       "  '差点': 1,\n",
       "  '异响': 1,\n",
       "  '变色': 1,\n",
       "  '丑': 5,\n",
       "  '性价比': 1,\n",
       "  '薄': 2,\n",
       "  '好看': 13,\n",
       "  '逼格高': 1,\n",
       "  '不好看': 4,\n",
       "  '喜欢': 5,\n",
       "  '特别迷恋': 1,\n",
       "  '小': 1,\n",
       "  '颜值差': 1,\n",
       "  '难看': 4,\n",
       "  ' 好看': 1,\n",
       "  '很好': 1,\n",
       "  '认可度高': 1,\n",
       "  '高': 1,\n",
       "  '尊贵': 1,\n",
       "  '挺炫': 1,\n",
       "  '中庸': 1,\n",
       "  '完败': 1,\n",
       "  '要求一致': 1,\n",
       "  '时尚': 1,\n",
       "  '大改': 2,\n",
       "  '省油': 2,\n",
       "  '大气': 1,\n",
       "  '下不来': 1,\n",
       "  '很够用': 1,\n",
       "  '普通': 1,\n",
       "  '差': 2,\n",
       "  '掰不动': 1,\n",
       "  '作用不大': 1,\n",
       "  '降低': 1,\n",
       "  '纠结': 1,\n",
       "  '凶狠的前脸': 1,\n",
       "  '高度都一样': 1,\n",
       "  '对口味': 1,\n",
       "  '不纠结': 1,\n",
       "  '看中外观': 1,\n",
       "  '  ': 1,\n",
       "  '高配好看': 1,\n",
       "  '外观不错': 2,\n",
       "  '视野好': 1,\n",
       "  '底盘厉害': 1,\n",
       "  '硬伤': 2,\n",
       "  '不错': 3,\n",
       "  '漂亮': 4,\n",
       "  '大': 1,\n",
       "  '合眼缘': 1,\n",
       "  '好': 2,\n",
       "  '倾斜大': 1,\n",
       "  '里外不一': 1,\n",
       "  '摇晃': 1,\n",
       "  '平庸': 1,\n",
       "  '越丑': 1,\n",
       "  '不符合': 1,\n",
       "  '不协调': 2,\n",
       "  '真难看': 2,\n",
       "  '好难看': 1,\n",
       "  '外形好看': 1,\n",
       "  '上了一个档次了，隐隐有豪华车的感觉': 1,\n",
       "  '外观没有丝毫质感！    ': 1,\n",
       "  '外观奇丑无比': 1,\n",
       "  '外观太小气': 1,\n",
       "  '外观太丑': 1,\n",
       "  '外观真心赞': 1,\n",
       "  '还不如十万块的国产车': 1,\n",
       "  '一次没修过': 1,\n",
       "  '挫样': 1,\n",
       "  '秀气': 1,\n",
       "  '接受不了': 1,\n",
       "  '廉价': 1,\n",
       "  '秒杀': 1,\n",
       "  '不在意': 1,\n",
       "  '软': 2,\n",
       "  '厚': 1,\n",
       "  '不敢恭维': 1,\n",
       "  '强化': 1,\n",
       "  '有点土': 1,\n",
       "  '硬朗': 1,\n",
       "  '一股丰田味': 1,\n",
       "  '无所谓': 1,\n",
       "  '不咋地 ': 1,\n",
       "  '这么看确实大气点': 1,\n",
       "  '无力吐槽': 1,\n",
       "  '颜色和光线还可以': 1,\n",
       "  '看着真舒服': 1,\n",
       "  '觉得19款没有老款好看': 1,\n",
       "  '很耐看': 1,\n",
       "  '大气，中规中矩！': 1,\n",
       "  '外形至少本人觉得新款还可以': 1,\n",
       "  '拼凑杀马特': 1,\n",
       "  '尾灯好丑': 1,\n",
       "  '史上最难看': 1,\n",
       "  '低调': 1,\n",
       "  '油耗低': 1,\n",
       "  '不高': 1,\n",
       "  '水平对置发动机 全时四驱是最大卖点': 1,\n",
       "  '比直列的要轻很多': 1,\n",
       "  '车漆薄': 1,\n",
       "  '日系车漆软': 1,\n",
       "  '车漆软': 2,\n",
       "  '车漆真软': 1,\n",
       "  '喜欢这种前脸': 1,\n",
       "  '前脸设计看着精致，协调了': 1,\n",
       "  '前脸无爱': 1,\n",
       "  '镀铬太多 让人尴尬': 1,\n",
       "  '前脸更好看': 1,\n",
       "  '后排太硬。': 1,\n",
       "  '空间也大': 1,\n",
       "  '不像换代应有的水平': 1,\n",
       "  '多丑': 1,\n",
       "  '吊打': 1,\n",
       "  '配置高': 1,\n",
       "  '个人觉得比较好看': 1,\n",
       "  '不怎么好看': 1,\n",
       "  '帅气~': 1,\n",
       "  '社会社会！': 1,\n",
       "  '低级无下限': 1,\n",
       "  '新款厚重大气': 1,\n",
       "  '真的不错': 1,\n",
       "  '仍觉得好': 1,\n",
       "  '仁者见仁': 1,\n",
       "  '耐看': 1,\n",
       "  '耐脏': 1,\n",
       "  '不耐脏': 1,\n",
       "  '更好看': 1,\n",
       "  '不如别人': 1,\n",
       "  '我忍了': 1,\n",
       "  '清洗': 1,\n",
       "  '没有完美的只有适用的': 1,\n",
       "  '大头娃娃': 1,\n",
       "  '糊弄': 1,\n",
       "  '更有层次感': 1,\n",
       "  '失望': 1,\n",
       "  '不一致': 1,\n",
       "  '更霸气': 1,\n",
       "  '不喜欢': 1,\n",
       "  '车漆是真薄': 1,\n",
       "  '前脸倒是2018款的好看': 1,\n",
       "  '完胜': 1,\n",
       "  '莫名妖娆尾灯的混合体': 1,\n",
       "  '太国产了': 1},\n",
       " 4: {'耐用': 1,\n",
       "  '软': 3,\n",
       "  '不强': 1,\n",
       "  '差劲': 1,\n",
       "  '不耐磨': 1,\n",
       "  '刹不住、软': 1,\n",
       "  '心寒': 1,\n",
       "  '刹不住': 1,\n",
       "  '刹车软，刹车行程距离长': 1,\n",
       "  '不行': 2,\n",
       "  '不给力': 1,\n",
       "  '有问题': 1,\n",
       "  '不错': 4,\n",
       "  '认可度高': 1,\n",
       "  '要求一致': 1,\n",
       "  '没落锁': 1,\n",
       "  '安全': 2,\n",
       "  '第一': 1,\n",
       "  '高': 2,\n",
       "  '全时四驱': 2,\n",
       "  '强': 1,\n",
       "  '刹车软': 2,\n",
       "  '省油': 1,\n",
       "  '甩三条街': 1,\n",
       "  '便宜': 1,\n",
       "  '没宽敞': 1,\n",
       "  '超好用': 1,\n",
       "  '最适合': 1,\n",
       "  '恼人': 1,\n",
       "  '较好': 1,\n",
       "  '有效果': 1,\n",
       "  '空间大': 1,\n",
       "  '急刹车': 1,\n",
       "  '风力强劲': 1,\n",
       "  '喜欢他的机械手刹': 1,\n",
       "  '刹车怎么这么软呢  ': 1,\n",
       "  '卤素灯明显穿透力更强一些': 1,\n",
       "  '急加速': 1,\n",
       "  '设计要是再简练一些就好了': 1,\n",
       "  '高度只有180mm': 1,\n",
       "  '油耗真心棒': 1,\n",
       "  '最低油耗5.9驾驶体验超级棒': 1,\n",
       "  '更硬': 1,\n",
       "  '刹车脚感非常细腻，适合微操': 1,\n",
       "  '还完刹车线性了，随踩随有挺好   ': 1,\n",
       "  '避震我觉得挺好，够硬': 1,\n",
       "  '很高': 1,\n",
       "  '摆动': 1,\n",
       "  '无奈': 1,\n",
       "  '不让人省心': 1,\n",
       "  '算我输': 1,\n",
       "  '不太安全': 1,\n",
       "  '减配': 1,\n",
       "  '没质感': 1,\n",
       "  '好': 1,\n",
       "  '烂': 1,\n",
       "  '破': 1,\n",
       "  '可靠': 1,\n",
       "  '强度': 1,\n",
       "  '老化': 1,\n",
       "  '高很多': 1,\n",
       "  '不费力': 1,\n",
       "  '剎不住啊剎不住': 1,\n",
       "  '刹车异响': 1,\n",
       "  '喜欢': 1,\n",
       "  '结实': 1,\n",
       "  '差': 1,\n",
       "  '过硬': 1,\n",
       "  '很少出现': 1,\n",
       "  '很赞': 1,\n",
       "  '差很多': 1,\n",
       "  '座椅开裂': 1,\n",
       "  '没办法吐槽': 1,\n",
       "  '有点野性': 1,\n",
       "  '都不错   ': 1,\n",
       "  '不爽': 1},\n",
       " 5: {'还行': 1,\n",
       "  '满意': 8,\n",
       "  '省油': 20,\n",
       "  '不省油': 3,\n",
       "  '略高': 1,\n",
       "  '知足': 1,\n",
       "  '还得': 1,\n",
       "  '不高': 3,\n",
       "  '低': 15,\n",
       "  '没感觉': 1,\n",
       "  '差不多': 2,\n",
       "  '高': 12,\n",
       "  '没增加': 1,\n",
       "  '足': 1,\n",
       "  '市区': 7,\n",
       "  '左右': 2,\n",
       "  '不公平': 1,\n",
       "  '全行业造假': 1,\n",
       "  '真不喜欢': 1,\n",
       "  '费油': 10,\n",
       "  '不错': 5,\n",
       "  '够': 1,\n",
       "  '大': 4,\n",
       "  '颜值差': 1,\n",
       "  '偏大': 1,\n",
       "  '完败': 2,\n",
       "  '烧机油': 2,\n",
       "  '差': 2,\n",
       "  '可以': 1,\n",
       "  '没问题': 1,\n",
       "  '升高': 1,\n",
       "  '噪音大': 1,\n",
       "  '噪音小': 1,\n",
       "  '增加': 5,\n",
       "  '油耗大': 2,\n",
       "  '正常': 4,\n",
       "  '油耗上升': 1,\n",
       "  '无毛病': 1,\n",
       "  '没有任何油耗现象': 1,\n",
       "  '平顺自然': 1,\n",
       "  '也不高': 1,\n",
       "  '好一些': 1,\n",
       "  '没必要做': 1,\n",
       "  '给力': 2,\n",
       "  '额外惊喜': 1,\n",
       "  '哈哈': 1,\n",
       "  '油耗增加': 2,\n",
       "  '油耗会上升': 1,\n",
       "  '没有任何问题': 1,\n",
       "  '没啥区别': 1,\n",
       "  '油耗好': 1,\n",
       "  '油耗高': 1,\n",
       "  '内饰也可以': 1,\n",
       "  '顶不住': 1,\n",
       "  '不错的': 1,\n",
       "  '油耗下降': 1,\n",
       "  '不会高': 2,\n",
       "  '很低': 1,\n",
       "  '油耗低一些': 1,\n",
       "  '油耗太高': 1,\n",
       "  '油耗造假': 1,\n",
       "  '增加油耗': 1,\n",
       "  '油耗高一个': 1,\n",
       "  '没怎么关心': 1,\n",
       "  '油耗有上升': 1,\n",
       "  '稳定': 1,\n",
       "  '油耗哗哗的': 1,\n",
       "  '无变化': 1,\n",
       "  '比以前多': 1,\n",
       "  '还可以': 2,\n",
       "  '省油的': 1,\n",
       "  '油耗很高': 2,\n",
       "  '不可能低': 1,\n",
       "  '高了点': 1,\n",
       "  '高点': 1,\n",
       "  '颇高': 1,\n",
       "  '高很多': 1,\n",
       "  '特大': 1,\n",
       "  '废油': 1,\n",
       "  '惊喜': 1,\n",
       "  '大不少': 1,\n",
       "  '贼高': 1,\n",
       "  '效果不错': 1,\n",
       "  '不好': 1,\n",
       "  '恼人': 1,\n",
       "  '相当棒': 1,\n",
       "  '非常满意': 1,\n",
       "  '油耗蹭蹭的往上涨': 1,\n",
       "  '性价比低': 1,\n",
       "  '油耗不好控制': 1,\n",
       "  '耐用': 1,\n",
       "  '油耗没那么高': 1,\n",
       "  '太高': 1,\n",
       "  '值得拥有': 1,\n",
       "  '确实省': 1,\n",
       "  '胜出': 1,\n",
       "  '稍微高点': 1,\n",
       "  '高一点': 1,\n",
       "  '大些': 1,\n",
       "  '够可以': 1,\n",
       "  '油耗低': 5,\n",
       "  '这油耗牛啊': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '操纵好，底盘高': 1,\n",
       "  '2.0森市区油耗真心不错，': 1,\n",
       "  '油耗真心棒': 1,\n",
       "  '最低油耗5.9驾驶体验超级棒': 1,\n",
       "  '低调': 1,\n",
       "  '非常好': 1,\n",
       "  '很高': 1,\n",
       "  '省油不少 ': 1,\n",
       "  '油耗确实很高': 1,\n",
       "  '油耗水平已经很满意': 1,\n",
       "  '油耗也不高': 1,\n",
       "  '后排太硬。': 1,\n",
       "  '一般般': 1,\n",
       "  '爱咋滴就咋滴': 1,\n",
       "  '不敢恭维': 1,\n",
       "  '不着调': 1,\n",
       "  '不见得省': 1,\n",
       "  '不一定费油': 1,\n",
       "  '才': 1,\n",
       "  '省': 1,\n",
       "  '城市': 1,\n",
       "  '省油环保': 1,\n",
       "  '不高、经济': 1,\n",
       "  '不低': 1,\n",
       "  '推荐': 1,\n",
       "  '结实': 1,\n",
       "  '谁买谁后悔': 1,\n",
       "  '油耗奇高': 1,\n",
       "  '油耗升了': 1,\n",
       "  '油耗不低': 1,\n",
       "  '油耗会下来': 1,\n",
       "  '真高': 1,\n",
       "  '特费油': 1,\n",
       "  '不靠谱': 1,\n",
       "  '较低': 1,\n",
       "  '很满意': 1,\n",
       "  '不费油': 1,\n",
       "  '稍微低': 1,\n",
       "  '心真累': 1,\n",
       "  '有点儿高': 1,\n",
       "  '耍流氓': 1,\n",
       "  '神奇无比': 1,\n",
       "  '不必在乎': 1},\n",
       " 6: {'好些': 1,\n",
       "  '满足': 1,\n",
       "  '小': 12,\n",
       "  '都不错': 1,\n",
       "  '大': 20,\n",
       "  '硬伤': 2,\n",
       "  '真不喜欢': 1,\n",
       "  '宽敞': 2,\n",
       "  '不好看': 1,\n",
       "  '后排挤': 1,\n",
       "  '不行': 1,\n",
       "  '烧机油': 1,\n",
       "  '坐进去最放松的': 1,\n",
       "  '钻进去': 1,\n",
       "  '好': 3,\n",
       "  '不感冒': 1,\n",
       "  '大 ': 1,\n",
       "  '别奢望': 1,\n",
       "  '出色': 1,\n",
       "  '面包车': 1,\n",
       "  '省油': 1,\n",
       "  '杀手锏': 1,\n",
       "  '差别没那么大': 1,\n",
       "  '不错': 2,\n",
       "  '全塑料': 1,\n",
       "  '难看': 1,\n",
       "  '有动力': 1,\n",
       "  '车体高': 1,\n",
       "  '均衡': 1,\n",
       "  '差不多': 1,\n",
       "  '舒服': 4,\n",
       "  '空间不行': 3,\n",
       "  '实用': 1,\n",
       "  '豪华感强': 1,\n",
       "  '各方面都好些': 1,\n",
       "  '不差': 1,\n",
       "  '舒服的很': 1,\n",
       "  '有保障': 1,\n",
       "  '空间大动力可以': 1,\n",
       "  '内饰豪华': 1,\n",
       "  '内饰绝对豪华': 1,\n",
       "  '空间大': 6,\n",
       "  '非常好': 1,\n",
       "  '空间有点': 1,\n",
       "  '不敢恭维': 1,\n",
       "  '后排空间小了点': 1,\n",
       "  '内饰低档': 1,\n",
       "  '空间不小': 1,\n",
       "  '特别不舒服': 1,\n",
       "  '容量大': 1,\n",
       "  '只要是空间小': 1,\n",
       "  '油耗低一些': 1,\n",
       "  '宽大舒适': 1,\n",
       "  '特大': 1,\n",
       "  '不满+E+E11:E742': 1,\n",
       "  '噪音减少': 1,\n",
       "  '小一点': 2,\n",
       "  '低': 2,\n",
       "  '差一点': 1,\n",
       "  '甩三条街': 1,\n",
       "  '合眼缘': 1,\n",
       "  '没宽敞': 1,\n",
       "  '有优势': 1,\n",
       "  '差': 1,\n",
       "  '更方便': 1,\n",
       "  '杠杠的': 1,\n",
       "  '挺好的': 1,\n",
       "  '喜欢': 1,\n",
       "  '还行': 1,\n",
       "  '外形好看': 1,\n",
       "  '比森林人舒服多了': 1,\n",
       "  '外观没有丝毫质感！    ': 1,\n",
       "  '不小': 1,\n",
       "  '有异响': 2,\n",
       "  '太小': 1,\n",
       "  '高点': 1,\n",
       "  '胜出': 1,\n",
       "  '大了很多': 1,\n",
       "  '完败': 1,\n",
       "  '漂亮': 1,\n",
       "  '抓狂': 1,\n",
       "  '不咋地': 1,\n",
       "  '理想': 1,\n",
       "  '是大的': 1,\n",
       "  '大空间': 1,\n",
       "  '轴距和空间严重不符': 1,\n",
       "  '坐姿高视线更好': 1,\n",
       "  '高度只有180mm': 1,\n",
       "  '低调': 1,\n",
       "  '较大后备箱': 1,\n",
       "  '比森林人强不少': 1,\n",
       "  '空间坐姿都好点': 1,\n",
       "  '动力和空间太诱惑了！': 1,\n",
       "  '不为别的，就为了中控台和空间': 1,\n",
       "  '车内空间蛮大': 1,\n",
       "  '操控很满意': 1,\n",
       "  '空间较大': 1,\n",
       "  '空间也大': 1,\n",
       "  '更好': 1,\n",
       "  '小了点': 1,\n",
       "  '太小了': 1,\n",
       "  '比不了': 1,\n",
       "  '异响': 1,\n",
       "  '不如': 2,\n",
       "  '卖点': 1,\n",
       "  '足够': 1,\n",
       "  '空间足够': 1,\n",
       "  '太挤了': 1,\n",
       "  '座位太低': 1,\n",
       "  '更霸气': 1,\n",
       "  '还小': 1,\n",
       "  '舒适度也很高': 1,\n",
       "  '各方面优于森林人': 1},\n",
       " 7: {'静音': 5,\n",
       "  '原厂': 1,\n",
       "  '空调': 5,\n",
       "  '大': 6,\n",
       "  '坑': 1,\n",
       "  '异响': 25,\n",
       "  '噪音': 15,\n",
       "  '噪音大': 14,\n",
       "  '小': 2,\n",
       "  '好': 8,\n",
       "  '颠': 1,\n",
       "  '不舒服': 3,\n",
       "  '有问题打不开': 1,\n",
       "  '乱响': 1,\n",
       "  '宽敞': 1,\n",
       "  '卡顿': 1,\n",
       "  '烧机油': 10,\n",
       "  '不咋地': 1,\n",
       "  '质量很差': 1,\n",
       "  '腰疼': 1,\n",
       "  '抖': 2,\n",
       "  '气门声音大    ': 1,\n",
       "  '响': 18,\n",
       "  '空调很牛逼': 1,\n",
       "  '隔音不好': 1,\n",
       "  '可以': 1,\n",
       "  '忽悠 容易晕车': 1,\n",
       "  '喜欢': 1,\n",
       "  '唯一不足': 1,\n",
       "  '大 ': 1,\n",
       "  '无法比': 1,\n",
       "  '颜值差': 1,\n",
       "  '改观不少': 1,\n",
       "  '看中座椅记忆': 1,\n",
       "  '丑': 1,\n",
       "  '提升太多': 1,\n",
       "  '差': 2,\n",
       "  '省油': 1,\n",
       "  '没问题': 1,\n",
       "  '噪音小': 3,\n",
       "  '没落锁': 1,\n",
       "  '声音很大': 1,\n",
       "  '不是很舒适': 1,\n",
       "  '颠簸': 1,\n",
       "  '有点影响': 1,\n",
       "  '有味': 1,\n",
       "  '转速低': 1,\n",
       "  '噪音较大': 1,\n",
       "  '放弃': 1,\n",
       "  '噪音不大': 1,\n",
       "  '不行': 1,\n",
       "  '较差': 1,\n",
       "  '有噪音': 2,\n",
       "  '受不了': 2,\n",
       "  '味特别大': 1,\n",
       "  '恒温': 1,\n",
       "  '有声音': 1,\n",
       "  '空调不咋样': 1,\n",
       "  '冻得': 1,\n",
       "  '空间不行': 1,\n",
       "  '耳朵不好了': 1,\n",
       "  '正常': 4,\n",
       "  '动力好点': 1,\n",
       "  '音响太差': 1,\n",
       "  '刹车软': 1,\n",
       "  '忽略不计': 1,\n",
       "  '可以忽略': 1,\n",
       "  '比以前快': 1,\n",
       "  '制冷效果好': 1,\n",
       "  '掩盖噪音': 1,\n",
       "  '油耗好': 1,\n",
       "  '噪音有所下降': 1,\n",
       "  '开空调滴水': 1,\n",
       "  '滴水现象': 1,\n",
       "  '一般': 1,\n",
       "  '很冷': 1,\n",
       "  '不给力': 1,\n",
       "  '油耗高': 1,\n",
       "  '变冰箱': 1,\n",
       "  '确实不好': 1,\n",
       "  '油耗下降': 1,\n",
       "  '适当保养': 1,\n",
       "  '空间不小': 1,\n",
       "  '底盘漏油': 1,\n",
       "  '很低': 1,\n",
       "  '满意': 1,\n",
       "  '速度快噪音大': 1,\n",
       "  '没用': 1,\n",
       "  '无变化': 1,\n",
       "  '开空调没劲': 1,\n",
       "  '比不过': 1,\n",
       "  '噪音减少': 1,\n",
       "  '有点提升': 1,\n",
       "  '差一点': 1,\n",
       "  '低': 1,\n",
       "  '声音大': 2,\n",
       "  '不错': 4,\n",
       "  '噪音依旧': 1,\n",
       "  '抖抖': 1,\n",
       "  '大不少': 1,\n",
       "  '好听': 1,\n",
       "  '好用': 1,\n",
       "  '难受': 1,\n",
       "  '不方便': 1,\n",
       "  '发出声音': 1,\n",
       "  '倾斜大': 1,\n",
       "  '好点': 1,\n",
       "  '还可以': 1,\n",
       "  '蛮好': 1,\n",
       "  '风噪最大': 1,\n",
       "  '比较差': 1,\n",
       "  '原胎声音我是受不了': 1,\n",
       "  '不严实 ': 1,\n",
       "  '有异响': 2,\n",
       "  '给力': 3,\n",
       "  '没有': 1,\n",
       "  '出问题': 2,\n",
       "  '偏低': 1,\n",
       "  '硬': 1,\n",
       "  '不严实': 1,\n",
       "  '高': 2,\n",
       "  '毛病多': 1,\n",
       "  '风噪大': 1,\n",
       "  '可以了': 1,\n",
       "  '很好用': 3,\n",
       "  '滋滋声': 1,\n",
       "  '土腥味': 1,\n",
       "  '特别抖': 1,\n",
       "  '制冷快': 1,\n",
       "  '抖动': 1,\n",
       "  '大些': 1,\n",
       "  '摩擦音': 1,\n",
       "  '气味呛人': 1,\n",
       "  '非常好': 2,\n",
       "  '风力强劲': 1,\n",
       "  '噪音低': 1,\n",
       "  '异响比较多': 1,\n",
       "  '坐姿高视线更好': 1,\n",
       "  '后排座椅 格叽格叽响': 1,\n",
       "  '有点糙': 1,\n",
       "  '断轴': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '隔音还不如森': 1,\n",
       "  '底盘不是很紧凑 ': 1,\n",
       "  '高度只有180mm': 1,\n",
       "  '2.0森市区油耗真心不错，': 1,\n",
       "  '最低油耗5.9驾驶体验超级棒': 1,\n",
       "  '油耗低': 1,\n",
       "  '不高': 1,\n",
       "  '油耗水平已经很满意': 1,\n",
       "  '有金属敲击的声音': 1,\n",
       "  '风噪真心大': 1,\n",
       "  '风噪是大了点': 1,\n",
       "  '发动机噪音确实大': 1,\n",
       "  '隔音效果一般': 1,\n",
       "  '静音舒适': 1,\n",
       "  '空间大': 2,\n",
       "  '空间坐姿都好点': 1,\n",
       "  '动力和空间太诱惑了！': 1,\n",
       "  '操控很满意': 1,\n",
       "  '空间较大': 1,\n",
       "  '后排太硬。': 1,\n",
       "  '颈椎有点累 ': 1,\n",
       "  'X1的座椅小的可怜': 1,\n",
       "  '一般般': 1,\n",
       "  '还行吧': 1,\n",
       "  '扯淡': 1,\n",
       "  '不介意': 1,\n",
       "  '很差': 1,\n",
       "  '凉': 1,\n",
       "  '质量好': 1,\n",
       "  '有一点噪音': 1,\n",
       "  '真不错': 1,\n",
       "  '有提高了': 1,\n",
       "  '不如别人': 1,\n",
       "  '有点硬': 1,\n",
       "  '风噪音': 1,\n",
       "  '不能忍': 1,\n",
       "  ' 噪音大': 1,\n",
       "  '免疫': 1,\n",
       "  '空调不行': 1,\n",
       "  '噪音稍微好些': 1,\n",
       "  '进一步增大': 1,\n",
       "  '基本上没噪音了': 1,\n",
       "  '不贵': 1,\n",
       "  '没办法': 2,\n",
       "  '麻烦': 1,\n",
       "  '噪声大': 1,\n",
       "  '别太在意': 1,\n",
       "  '舒服': 1,\n",
       "  '不小': 1,\n",
       "  '奇葩': 1,\n",
       "  '非常差': 1,\n",
       "  '操控更好': 1,\n",
       "  '问题': 1,\n",
       "  '坏': 2,\n",
       "  '强': 1,\n",
       "  '土味': 1,\n",
       "  '何来性能': 1,\n",
       "  '风噪大很多': 1,\n",
       "  '制冷效果差': 1,\n",
       "  '舒适度也很高': 1,\n",
       "  '不错，无噪音': 1,\n",
       "  '优点安静': 1,\n",
       "  '有点儿高': 1,\n",
       "  '费劲': 1,\n",
       "  '差点': 1},\n",
       " 8: {'平衡': 3,\n",
       "  '一流': 1,\n",
       "  '追求': 1,\n",
       "  '良好': 1,\n",
       "  '第一': 1,\n",
       "  '灵活': 2,\n",
       "  '垃圾': 1,\n",
       "  '很好': 4,\n",
       "  '完胜': 1,\n",
       "  '好用': 1,\n",
       "  '大': 4,\n",
       "  '换': 1,\n",
       "  '规格': 1,\n",
       "  '专业': 1,\n",
       "  '漂移': 1,\n",
       "  '没敢玩': 1,\n",
       "  '不错': 12,\n",
       "  '不给力': 1,\n",
       "  '好感': 1,\n",
       "  '就那么回事': 1,\n",
       "  '渗油': 1,\n",
       "  '烧机油': 1,\n",
       "  '可以': 2,\n",
       "  '喜欢': 5,\n",
       "  '不如': 1,\n",
       "  '无法比': 2,\n",
       "  '颜值差': 1,\n",
       "  '打95分': 1,\n",
       "  '高': 4,\n",
       "  '好看': 1,\n",
       "  '改了也不够': 1,\n",
       "  '完败': 1,\n",
       "  '舒服': 5,\n",
       "  '要求一致': 1,\n",
       "  '大改': 1,\n",
       "  '下不来': 1,\n",
       "  '值得': 1,\n",
       "  '不飘': 1,\n",
       "  '好': 9,\n",
       "  '一般': 2,\n",
       "  '比不了': 1,\n",
       "  '承受不起': 1,\n",
       "  '厉害': 2,\n",
       "  '灵巧': 1,\n",
       "  '没问题': 3,\n",
       "  '省油': 3,\n",
       "  '异响': 2,\n",
       "  '偏硬': 1,\n",
       "  '静音不好': 1,\n",
       "  '更硬': 1,\n",
       "  '很韧性': 1,\n",
       "  '优势': 2,\n",
       "  '很稳': 1,\n",
       "  '更好': 2,\n",
       "  '很准': 1,\n",
       "  '好点': 1,\n",
       "  '操控感': 2,\n",
       "  '换代慢': 1,\n",
       "  '安全': 1,\n",
       "  '噪音偏大': 1,\n",
       "  '转速低': 1,\n",
       "  '差不多': 2,\n",
       "  '强': 4,\n",
       "  '抖': 1,\n",
       "  '响': 1,\n",
       "  '差劲': 1,\n",
       "  '优点': 3,\n",
       "  '轻松': 1,\n",
       "  '刹车盘': 1,\n",
       "  '忍住没改': 1,\n",
       "  '舒适感强': 1,\n",
       "  '操控也好': 1,\n",
       "  '刺拉拉': 1,\n",
       "  '掉漆': 1,\n",
       "  '重新低一些': 1,\n",
       "  '操控真心不错': 1,\n",
       "  '犹如宝马一般': 1,\n",
       "  '比较好': 1,\n",
       "  '优': 1,\n",
       "  '内饰绝对豪华': 1,\n",
       "  '确实很棒': 1,\n",
       "  '没那么松散': 1,\n",
       "  '自信多了': 1,\n",
       "  '操控性好很多': 1,\n",
       "  '告诉抖动': 1,\n",
       "  '相当好': 2,\n",
       "  '操控很满意': 2,\n",
       "  '嘎嘎响': 1,\n",
       "  '操控很棒': 1,\n",
       "  '没有任何问题': 1,\n",
       "  '底盘更高': 1,\n",
       "  '不舒服': 1,\n",
       "  '可以调节': 1,\n",
       "  '没什么影响': 1,\n",
       "  '没有磨损': 1,\n",
       "  '手感不错': 1,\n",
       "  '脱皮': 1,\n",
       "  '兼顾了太多东西': 1,\n",
       "  '底盘最高': 1,\n",
       "  '有变化': 1,\n",
       "  '底盘漏油': 1,\n",
       "  '烂了无所谓': 1,\n",
       "  '上了一个档次': 1,\n",
       "  '视野好': 1,\n",
       "  '底盘厉害': 1,\n",
       "  '走烂路很颠': 1,\n",
       "  '控制不了': 2,\n",
       "  '磨得发亮': 1,\n",
       "  '紧凑': 1,\n",
       "  '看点是操控': 1,\n",
       "  '不费劲': 1,\n",
       "  '还可以': 1,\n",
       "  '宽大舒适': 1,\n",
       "  '比不过': 1,\n",
       "  '差': 3,\n",
       "  '小一点': 2,\n",
       "  '增加': 2,\n",
       "  '低': 2,\n",
       "  '便宜': 2,\n",
       "  '好看，炫酷': 1,\n",
       "  '甩三条街': 1,\n",
       "  '合眼缘': 1,\n",
       "  '没宽敞': 1,\n",
       "  '全': 1,\n",
       "  '噪音依旧': 1,\n",
       "  '性能好': 4,\n",
       "  '没的说': 3,\n",
       "  '坐姿高': 1,\n",
       "  '味道真不小': 1,\n",
       "  '满意': 1,\n",
       "  '驾驶质感强': 1,\n",
       "  '廉价': 1,\n",
       "  '蛮好': 1,\n",
       "  '给力': 1,\n",
       "  '安心': 1,\n",
       "  '欠缺': 1,\n",
       "  '原胎声音我是受不了': 1,\n",
       "  '废刹车': 1,\n",
       "  '质量好': 1,\n",
       "  '完美': 1,\n",
       "  '有': 1,\n",
       "  '通病': 1,\n",
       "  '肯定': 1,\n",
       "  '刹不住': 1,\n",
       "  '随性': 1,\n",
       "  '操控好': 1,\n",
       "  '较好': 1,\n",
       "  '稳': 1,\n",
       "  '破百': 1,\n",
       "  '很一般': 1,\n",
       "  '胜出': 1,\n",
       "  '空间大': 1,\n",
       "  '也不错': 1,\n",
       "  '最好': 1,\n",
       "  '无敌': 1,\n",
       "  '丰富': 1,\n",
       "  '闻名': 1,\n",
       "  '可靠': 2,\n",
       "  '有点性能': 1,\n",
       "  '没得说': 1,\n",
       "  '高一点': 1,\n",
       "  '超过': 1,\n",
       "  '可以的': 1,\n",
       "  '差很多': 1,\n",
       "  '非常好': 2,\n",
       "  '略胜一筹': 1,\n",
       "  '性能高一点': 1,\n",
       "  '好性能': 1,\n",
       "  '理想': 1,\n",
       "  '够可以': 1,\n",
       "  '有提升': 1,\n",
       "  '距离短': 1,\n",
       "  '方向好飘，方向盘总是要不断修正方向': 1,\n",
       "  '车身的反应较迟钝。': 1,\n",
       "  '刹车怎么这么软呢  ': 1,\n",
       "  '比较灵': 1,\n",
       "  '离地间隙低': 1,\n",
       "  '有点糙': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '需要一直微调': 1,\n",
       "  '操纵好，底盘高': 1,\n",
       "  '底盘不是很紧凑 ': 1,\n",
       "  '设计要是再简练一些就好了': 1,\n",
       "  '高度只有180mm': 1,\n",
       "  '2.0森市区油耗真心不错，': 1,\n",
       "  '较大后备箱': 1,\n",
       "  '变速箱落后是真的': 1,\n",
       "  '没见过跑14万还有那么好的机器': 1,\n",
       "  '配置高': 1,\n",
       "  '就要这么开': 1,\n",
       "  '飞叉叉': 1,\n",
       "  '方向盘飘': 1,\n",
       "  '基本可以': 1,\n",
       "  '飘': 1,\n",
       "  '摆动': 1,\n",
       "  '着迷': 1,\n",
       "  '别纠结': 1,\n",
       "  '不后悔': 1,\n",
       "  '牛逼': 1,\n",
       "  '不解释': 1,\n",
       "  '挺稳的': 1,\n",
       "  '很危险': 1,\n",
       "  '直径': 1,\n",
       "  '小': 1,\n",
       "  '不贵': 1,\n",
       "  '不足': 1,\n",
       "  '不会高': 1,\n",
       "  '松散': 2,\n",
       "  '声音明显': 1,\n",
       "  '舒适': 1,\n",
       "  '太累': 1,\n",
       "  '拖刹': 1,\n",
       "  '不想管了': 1,\n",
       "  '不抖': 1,\n",
       "  '方向盘出白烟': 1,\n",
       "  '进一步增大': 1,\n",
       "  '其他很low': 1,\n",
       "  '好一点': 1,\n",
       "  '音量调节不好用': 1,\n",
       "  '为了操控': 1,\n",
       "  '操控不错': 1,\n",
       "  '没啥用': 1,\n",
       "  '操控更好': 1,\n",
       "  '较强': 1,\n",
       "  '不跑偏': 1,\n",
       "  '够了': 1,\n",
       "  '何来性能': 1,\n",
       "  '底盘很稳': 1,\n",
       "  '抓地力肯定提高了': 1,\n",
       "  '没办法吐槽': 1,\n",
       "  '操控变差': 1,\n",
       "  '各方面优于森林人': 1,\n",
       "  '前脸倒是2018款的好看': 1,\n",
       "  '秒杀': 1},\n",
       " 9: {'实用': 2,\n",
       "  '没': 1,\n",
       "  '好': 4,\n",
       "  '不实用': 1,\n",
       "  'OK': 1,\n",
       "  '背光': 1,\n",
       "  '不错': 5,\n",
       "  '没有': 1,\n",
       "  '山寨': 1,\n",
       "  '不值钱': 1,\n",
       "  '贵': 1,\n",
       "  '比不上': 1,\n",
       "  '黑': 1,\n",
       "  '垃圾': 7,\n",
       "  '差': 4,\n",
       "  '卡死': 1,\n",
       "  '黑屏': 2,\n",
       "  '惨不忍睹': 1,\n",
       "  '值': 2,\n",
       "  '鸡肋': 3,\n",
       "  '连线': 1,\n",
       "  '简单': 1,\n",
       "  '不太喜欢': 1,\n",
       "  '坑': 3,\n",
       "  '都不错': 1,\n",
       "  '异响': 1,\n",
       "  '后窗': 1,\n",
       "  '国内减配': 1,\n",
       "  '不错不错': 1,\n",
       "  '不行': 2,\n",
       "  '一点问题都没有': 1,\n",
       "  '大 ': 1,\n",
       "  '出色': 1,\n",
       "  '全塑料': 1,\n",
       "  '高': 8,\n",
       "  ' 好看': 1,\n",
       "  '性价比高': 3,\n",
       "  '喜欢': 1,\n",
       "  '改观不少': 1,\n",
       "  '缺失遗憾': 1,\n",
       "  '顶级': 1,\n",
       "  '超前': 1,\n",
       "  '值得': 1,\n",
       "  '低': 4,\n",
       "  '后悔': 1,\n",
       "  '买它的机械性能': 1,\n",
       "  '不能忍': 2,\n",
       "  '打95分': 1,\n",
       "  '不咋样': 2,\n",
       "  '不太重视': 1,\n",
       "  '偏大': 1,\n",
       "  '很好': 1,\n",
       "  '不选': 1,\n",
       "  '丑': 1,\n",
       "  '代价大': 1,\n",
       "  '无所谓': 1,\n",
       "  '不如': 1,\n",
       "  '不是大改款': 1,\n",
       "  '电动尾门太慢': 1,\n",
       "  '承受不起': 1,\n",
       "  '没落锁': 1,\n",
       "  '均衡': 1,\n",
       "  '强': 1,\n",
       "  '省钱': 1,\n",
       "  '没必要': 1,\n",
       "  '有声音': 1,\n",
       "  '有手机导航': 1,\n",
       "  '掉了链子': 1,\n",
       "  '垃圾导航': 1,\n",
       "  '没有导航': 2,\n",
       "  '手机导航方便': 1,\n",
       "  '用导航不少': 1,\n",
       "  '手机导航': 5,\n",
       "  '都一样': 1,\n",
       "  '不带导航': 1,\n",
       "  '车载导航带到沟里': 1,\n",
       "  '硬伤': 2,\n",
       "  '有点提升': 1,\n",
       "  '没优势': 1,\n",
       "  '厚道': 1,\n",
       "  '合眼缘': 1,\n",
       "  '全': 1,\n",
       "  '漂亮': 1,\n",
       "  '不方便': 1,\n",
       "  '太垃圾': 1,\n",
       "  '太扯了': 1,\n",
       "  '满意': 2,\n",
       "  '还可以': 1,\n",
       "  '非常不错': 1,\n",
       "  '比较合理': 1,\n",
       "  '不值得': 1,\n",
       "  '蛮好': 1,\n",
       "  '不值': 1,\n",
       "  '配置简陋': 1,\n",
       "  '很差': 1,\n",
       "  '强太多': 1,\n",
       "  '简配太厉害': 1,\n",
       "  '主导航垃圾': 1,\n",
       "  '坑爹': 1,\n",
       "  '合适': 1,\n",
       "  '胜出': 1,\n",
       "  '提升': 2,\n",
       "  '吐槽': 1,\n",
       "  '丰富': 1,\n",
       "  '一般': 1,\n",
       "  '够了': 1,\n",
       "  '秒杀': 1,\n",
       "  '其他配置真心喜欢': 1,\n",
       "  '喜欢他的机械手刹': 1,\n",
       "  '四驱好能安全一点 ': 1,\n",
       "  '米其林3ST，安静，舒适': 1,\n",
       "  '再贵的车也没有欧蓝德好': 1,\n",
       "  '肯定四驱欧蓝德好    ': 1,\n",
       "  '压倒性的说欧蓝德更好': 1,\n",
       "  '已成绝唱': 1,\n",
       "  '好用': 2,\n",
       "  '有点糙': 1,\n",
       "  '拼凑杀马特': 1,\n",
       "  '机舱里走管子不好走': 1,\n",
       "  '机械上还是很皮实的': 1,\n",
       "  '断轴': 1,\n",
       "  '方向盘比较轻': 1,\n",
       "  '操纵好，底盘高': 1,\n",
       "  '2.0森市区油耗真心不错，': 1,\n",
       "  '最低油耗5.9驾驶体验超级棒': 1,\n",
       "  '油耗低': 1,\n",
       "  '开着各方面都舒服': 1,\n",
       "  '只有优点，缺点几乎为零': 1,\n",
       "  '配置高': 1,\n",
       "  '配置比较低': 1,\n",
       "  '蛮期待 SGP 对新森的影响': 1,\n",
       "  '新款厚重大气': 1,\n",
       "  '好的不是一点': 1,\n",
       "  '音质不行': 1,\n",
       "  '是否可以': 1,\n",
       "  '很良心': 1,\n",
       "  '心里凉凉的': 1,\n",
       "  '早日完蛋': 1,\n",
       "  '不后悔': 1,\n",
       "  '不让人省心': 1,\n",
       "  '没区别': 1,\n",
       "  '齐全': 1,\n",
       "  '不怎么样': 1,\n",
       "  '换': 1,\n",
       "  '大': 1,\n",
       "  '有眼光': 1,\n",
       "  '差不太多': 1,\n",
       "  '不纠结': 1,\n",
       "  '有提高了': 1,\n",
       "  '省心': 1,\n",
       "  '正常': 2,\n",
       "  '方便': 1,\n",
       "  '没用': 1,\n",
       "  '没导航': 1,\n",
       "  '导航很山寨': 1,\n",
       "  '导航坏了': 1,\n",
       "  '导航山寨': 1,\n",
       "  '山寨导航': 1,\n",
       "  ' 低': 1,\n",
       "  '车载导航没意思': 1,\n",
       "  '比欧蓝德的四驱强多了': 1,\n",
       "  '确实是不好用': 1,\n",
       "  '发动机比较low': 1}}"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sent_dict = {i:{} for i in range(10)}\n",
    "for i, w in enumerate(train_df_final['sentiment_word'].values):\n",
    "    if str(w) == 'nan':\n",
    "        continue\n",
    "    for j in range(10):\n",
    "        if train_df_final.iloc[i, j+5] > 0:\n",
    "            sent_dict[j][w] = sent_dict[j].get(w, 0) + 1\n",
    "\n",
    "# train_df_final.iloc[:, 5]\n",
    "sent_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "create embedding matrix\n"
     ]
    }
   ],
   "source": [
    "model_file = '../../data/word2vec_models/sgns.baidubaike.bigram-char'\n",
    "print('create embedding matrix')\n",
    "\n",
    "def get_coefs(word, *arr): \n",
    "    return word, np.asarray(arr, dtype='float32')\n",
    "embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(model_file).readlines()[1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "影响 1\n",
      "高 10\n",
      "低 4\n",
      "有钱任性 1\n",
      "便宜 14\n",
      "死硬 1\n",
      "一般 2\n",
      "贵 12\n",
      "优惠 29\n",
      "不会后悔 1\n",
      "优惠太小 1\n",
      "才优惠 1\n",
      "才 1\n",
      "呵呵 2\n",
      "只优惠 2\n",
      "值 3\n",
      "套路 1\n",
      "好 2\n",
      "都不错 1\n",
      "喜欢 1\n",
      "烧机油 1\n",
      "好看 3\n",
      "坐进去最放松的 1\n",
      "不值 1\n",
      "性价比高 3\n",
      "不错 1\n",
      "优惠小 1\n",
      "可以 1\n",
      "愉快 1\n",
      "高昂 1\n",
      "合理 1\n",
      "下不来 1\n",
      "不会便宜 2\n",
      "不高 3\n",
      "不高  not found\n",
      "不完善 1\n",
      "放弃 1\n",
      "差不多 7\n",
      "价格不够 1\n",
      "不平易近人 1\n",
      "价格老贵了 1\n",
      "犹如宝马一般 1\n",
      "不能比的 1\n",
      "没有保证 1\n",
      "高了好多 1\n",
      "价格贵 2\n",
      "差距不大 1\n",
      "给个更便宜的价格 1\n",
      "贵了 1\n",
      "价格差不多 1\n",
      "价格高 2\n",
      "价格一般 1\n",
      "价格不知道 1\n",
      "价格不错 2\n",
      "价格不合适 1\n",
      "比较低的 1\n",
      "合理就行 1\n",
      "就行  not found\n",
      "不纠结 1\n",
      "行情价 1\n",
      "正常价格 1\n",
      "也就多几百 1\n",
      "几百  not found\n",
      "参差不齐 1\n",
      "贵太多 1\n",
      "贵太多  not found\n",
      "不便宜 4\n",
      "价格还行 1\n",
      "还行 1\n",
      "小贵 2\n",
      "没啥优惠 2\n",
      "合适就买 1\n",
      "挺高 1\n",
      "挺高  not found\n",
      "价差很大 1\n",
      "不菲 2\n",
      "低也不能要 1\n",
      "不要在意价格 1\n",
      "硬伤 1\n",
      "下降 1\n",
      "贵一点 2\n",
      "便宜些 2\n",
      "不贵 5\n",
      "  1\n",
      "厚道 1\n",
      "增加 1\n",
      "奇高 1\n",
      "奇高  not found\n",
      "噪音大 1\n",
      "不好 1\n",
      "比较合理 1\n",
      "最适合 1\n",
      "价格太贵 1\n",
      "价格较高 1\n",
      "便宜很多 1\n",
      "这个价格也超级优惠 1\n",
      "外观奇丑无比 1\n",
      "外观太丑 1\n",
      "最好 1\n",
      "操控满意 1\n",
      "很不错 1\n",
      "哪个贵 1\n",
      "好点 1\n",
      "好点  not found\n",
      "没有优势 1\n",
      "可以的 1\n",
      "也贵 1\n",
      "闻名 1\n",
      "虚高 1\n",
      "高了 1\n",
      "相当不错 1\n",
      "理想 1\n",
      "油耗低 1\n",
      "性价比不算很高 1\n",
      "轴距和空间严重不符 1\n",
      "再贵的车也没有欧蓝德好 1\n",
      "再贵  not found\n",
      "价格好 1\n",
      "好用 1\n",
      "好用  not found\n",
      "四驱无敌 1\n",
      "真心便宜，秒杀对手    1\n",
      "机舱里走管子不好走 1\n",
      "降价还是不要想了 1\n",
      "优惠力度还是可以的  1\n",
      "方向盘比较轻 1\n",
      "较大后备箱 1\n",
      "风噪是大了点 1\n",
      "风噪  not found\n",
      "比森林人强不少 1\n",
      "人强  not found\n",
      "隔音效果一般 1\n",
      "很良心 1\n",
      "找不出第二辆 1\n",
      "第二辆  not found\n",
      "不用考虑了 1\n",
      "不靠谱 1\n",
      "没法比 1\n",
      "咬咬牙的 1\n",
      "还不如 1\n",
      "不合算 1\n",
      "给力 1\n",
      "给力  not found\n",
      "少 1\n",
      "忽悠 1\n",
      "性价比 1\n",
      "差不太多 1\n",
      "差不太多  not found\n",
      "不够 1\n",
      "烦心 1\n",
      "丑 1\n",
      "适中 1\n",
      "贵吧 1\n",
      "空间足够 1\n",
      "也不高 1\n",
      "价格又黑 1\n",
      "操控不错 1\n",
      "略贵 1\n",
      "略贵  not found\n",
      "太贵 1\n",
      "更高 1\n",
      "更恶心 1\n",
      "菜车 1\n",
      "菜车  not found\n",
      "好车 1\n",
      "好车  not found\n",
      "再靠谱点 1\n",
      "没有性价比 1\n",
      "无敌 1\n",
      "舒适度也很高 1\n",
      "有一些优惠，但不会价格大跳水 1\n",
      "大跳水  not found\n",
      "被套路了 1\n",
      "不错，无噪音 1\n",
      "纠结 1\n",
      "普通 1\n",
      "还好 1\n",
      "松垮 1\n",
      "旧 1\n",
      "差 8\n",
      "不改改 1\n",
      "一点点 1\n",
      "一点点  not found\n",
      "不细致 1\n",
      "不错 5\n",
      "破破烂烂的 1\n",
      "烧机油 5\n",
      "坐进去最放松的 1\n",
      "影响驾驶 1\n",
      "好 4\n",
      "不感冒 1\n",
      "无法比 1\n",
      "面包车 1\n",
      "全塑料 1\n",
      "喜欢 7\n",
      "打95分 1\n",
      "不咋样 1\n",
      "认可度高 1\n",
      "皮实耐用 1\n",
      "提升太多 1\n",
      "好看 2\n",
      "电动尾门太慢 1\n",
      "尾门  not found\n",
      "挺炫 1\n",
      "中庸 1\n",
      "改了也不够 1\n",
      "完败 2\n",
      "要求一致 1\n",
      "简单不豪华 1\n",
      "才叫车 1\n",
      "太水 1\n",
      "手感爆炸 1\n",
      "不逊于 1\n",
      "时尚 1\n",
      "大改 2\n",
      "大改  not found\n",
      "强 4\n",
      "没的说 1\n",
      "贵 1\n",
      "省油 1\n",
      "大气 1\n",
      "可以 1\n",
      "出毛病 1\n",
      "用处不大 1\n",
      "脱皮 1\n",
      "记忆 1\n",
      "绝对值 1\n",
      "豪华感强 1\n",
      "感强  not found\n",
      "中上游 1\n",
      "粗糙 2\n",
      "好一些 1\n",
      "内饰豪华 1\n",
      "内饰绝对豪华 1\n",
      "内饰不咋地 1\n",
      "对口味 1\n",
      "可以保持 1\n",
      "内饰做工不提了，心知肚明 1\n",
      "不提  not found\n",
      "内饰也可以 1\n",
      "不如现款 1\n",
      "后排空间小了点 1\n",
      "内饰太老气 1\n",
      "内饰全塑料 1\n",
      "内饰低档 1\n",
      "用料不错 1\n",
      "伤感情 1\n",
      "对内饰要求不高 1\n",
      "不高  not found\n",
      "特别不舒服 1\n",
      "发动机异味 1\n",
      "没用 1\n",
      "一般 2\n",
      "内饰贼嚣张 1\n",
      "齐全就行 1\n",
      "就行  not found\n",
      "内饰改改 1\n",
      "简洁干练 1\n",
      "不满+E+E11:E742 1\n",
      "E11  not found\n",
      "E742  not found\n",
      "硬伤 1\n",
      "差一点 1\n",
      "好看，炫酷 1\n",
      "炫酷  not found\n",
      "令人发指 1\n",
      "有些粗 1\n",
      "全 1\n",
      "高出一个档次 1\n",
      "还可以 2\n",
      "性能好 1\n",
      "带喜感 1\n",
      "味道真不小 1\n",
      "好点 2\n",
      "好点  not found\n",
      "太扯了 1\n",
      "太扯  not found\n",
      "满意 1\n",
      "不够整 1\n",
      "不如 1\n",
      "渣 1\n",
      "没戏 1\n",
      "好不少 1\n",
      "有点难受 1\n",
      "大 1\n",
      "不豪华 1\n",
      "太差 2\n",
      "比较差 1\n",
      "外形好看 1\n",
      "内饰太过简单粗糟    1\n",
      "太过  not found\n",
      "粗糟  not found\n",
      "比森林人舒服多了 1\n",
      "外观奇丑无比 1\n",
      "外观太小气 1\n",
      "外观真心赞 1\n",
      "还不如十万块的国产车 1\n",
      "十万块  not found\n",
      "细 1\n",
      "骚 1\n",
      "扯淡 1\n",
      "低 1\n",
      "廉价 2\n",
      "吐槽 1\n",
      "掉渣 1\n",
      "秒杀 2\n",
      "胜出 1\n",
      "提升 1\n",
      "不咋地 3\n",
      "帅 1\n",
      "制冷快 1\n",
      "新 1\n",
      "简约的很 1\n",
      "不敢恭维 2\n",
      "比较好 1\n",
      "气味呛人 1\n",
      "惨不忍睹 1\n",
      "设计上也是不错 1\n",
      "还过的去 1\n",
      "非常一般 1\n",
      "落后 1\n",
      "一股丰田味 1\n",
      "一股  not found\n",
      "有提升 1\n",
      "不咋地  1\n",
      "后排座椅 格叽格叽响 1\n",
      "   not found\n",
      "格叽格  not found\n",
      "叽响  not found\n",
      "有点糙 1\n",
      "断轴 1\n",
      "方向盘比较轻 1\n",
      "内饰有廉价感 1\n",
      "操纵好，底盘高 1\n",
      "内饰一般 1\n",
      "非常好 1\n",
      "油耗低 1\n",
      "不高 1\n",
      "不高  not found\n",
      "比森林人强不少 1\n",
      "人强  not found\n",
      "空间也大 1\n",
      "不像换代应有的水平 1\n",
      "不像  not found\n",
      "多丑 1\n",
      "多丑  not found\n",
      "吊打 1\n",
      "配置高 1\n",
      "一般般 1\n",
      "一般般  not found\n",
      "更好 1\n",
      "有啥用 1\n",
      "设计感土土的 1\n",
      "居然 1\n",
      "真的不错 1\n",
      "不舒坦 1\n",
      "粗旷 1\n",
      "寒酸 1\n",
      "高档 1\n",
      "很好 1\n",
      "劣质材料 1\n",
      "侧面开裂外 1\n",
      "提高档次 1\n",
      "不是喜欢的 1\n",
      "漂亮 1\n",
      "不如人 1\n",
      "足够 1\n",
      "异响 1\n",
      "涉水 1\n",
      "可调 1\n",
      "内饰不错 1\n",
      "内饰老气 1\n",
      "舒服 1\n",
      " 低 1\n",
      "座位太低 1\n",
      "很赞 1\n",
      "很赞  not found\n",
      "还行 1\n",
      "何来性能 1\n",
      "座椅开裂 1\n",
      "设计感太落后 1\n",
      "感太  not found\n",
      "没办法吐槽 1\n",
      "前脸倒是2018款的好看 1\n",
      "前脸  not found\n",
      "完胜 1\n",
      "简单 1\n",
      "难看 1\n",
      "很棒 1\n",
      "垃圾 1\n",
      "恼火 1\n",
      "省油 6\n",
      "灵敏 1\n",
      "反应慢 1\n",
      "熄火 2\n",
      "肉 3\n",
      "不佳 1\n",
      "不佳  not found\n",
      "舒服 2\n",
      "烧机油 81\n",
      "没有烧机油 1\n",
      "运气好 1\n",
      "不烧机油 4\n",
      "不烧  not found\n",
      "郁闷 1\n",
      "没那么明显 1\n",
      "纠结 2\n",
      "坑 2\n",
      "没有 3\n",
      "慢 1\n",
      "爆震 8\n",
      "很少 1\n",
      "怕 2\n",
      "失望 4\n",
      "没烧 2\n",
      "够用 13\n",
      "没 1\n",
      "耐用 1\n",
      "太吵 1\n",
      "鸡肋 1\n",
      "少 1\n",
      "好 13\n",
      "好像 1\n",
      "足 2\n",
      "没问题 9\n",
      "超车 2\n",
      "担心 3\n",
      "满意 2\n",
      "通病 2\n",
      "轻松 2\n",
      "不错 11\n",
      "不烧 3\n",
      "不烧  not found\n",
      "驾驶 1\n",
      "用不完的劲 1\n",
      "有点烧机油 1\n",
      "有点受不了 1\n",
      "不够 5\n",
      "够 1\n",
      "杠杠的 3\n",
      "烧 2\n",
      "破破烂烂的 1\n",
      "贵 5\n",
      "噪音 3\n",
      "就那么回事 1\n",
      "有问题 1\n",
      "好很很多 1\n",
      "隔音不好 1\n",
      "质量不行 1\n",
      "完全够用 3\n",
      "有一个缸体不工作，严重抖动 1\n",
      "可以 2\n",
      "啥都好 1\n",
      "不能比拟 1\n",
      "爽 3\n",
      "机油不好 1\n",
      "大 3\n",
      "差别没那么大 1\n",
      "颜值差 1\n",
      "颜值  not found\n",
      "全塑料 1\n",
      "有动力 1\n",
      "改观不少 1\n",
      "低 5\n",
      "打95分 1\n",
      "偏大 1\n",
      "不选 1\n",
      "不选  not found\n",
      "高 9\n",
      "差 6\n",
      "代价大 1\n",
      "性价比高 1\n",
      "挺炫 1\n",
      "简单不豪华 1\n",
      "大改 2\n",
      "大改  not found\n",
      "强 3\n",
      "大气 1\n",
      "不完善 1\n",
      "全时四驱 3\n",
      "一般 2\n",
      "升高 1\n",
      "担心不足 1\n",
      "加强 1\n",
      "一般般 2\n",
      "一般般  not found\n",
      "反应敏捷 1\n",
      "比不了 1\n",
      "很够用 1\n",
      "承受不起 1\n",
      "不起  not found\n",
      "下降 5\n",
      "好看 1\n",
      "厉害 1\n",
      "好太多 1\n",
      "好太多  not found\n",
      "还行 2\n",
      "差的不是一点 1\n",
      "差的不是一点点 1\n",
      "一点点  not found\n",
      "足够 3\n",
      "还可以 3\n",
      "高原小霸王 1\n",
      "信心满满 1\n",
      "没落锁 1\n",
      "费油 1\n",
      "费油  not found\n",
      "故障 1\n",
      "异响 5\n",
      "弱 2\n",
      "增加 1\n",
      "渗油 2\n",
      "渗油  not found\n",
      "温度高 1\n",
      "漏油 5\n",
      "设计缺陷 1\n",
      "没情况 1\n",
      "很爽 2\n",
      "机油消耗 1\n",
      "无损耗 1\n",
      "抖动 1\n",
      "省心 2\n",
      "换油 1\n",
      "生锈 1\n",
      "噪音偏大 1\n",
      "伤蓄电池 1\n",
      "重心低 1\n",
      "避免 1\n",
      "没关系 1\n",
      "不漏 1\n",
      "涉水 2\n",
      "锁死 1\n",
      "难点 1\n",
      "查不出 1\n",
      "哗啦声 1\n",
      "哗啦声  not found\n",
      "不敢 1\n",
      "败笔 1\n",
      "缺陷 1\n",
      "转速低 1\n",
      "负担大 1\n",
      "轰轰响 1\n",
      "噪音较大 1\n",
      "用的很好 1\n",
      "均衡 1\n",
      "牛车 1\n",
      "油耗上升 1\n",
      "绝对值 1\n",
      "动力不够 2\n",
      "降低动力 1\n",
      "空间不行 2\n",
      "别纠结动力了 1\n",
      "发动机通红 1\n",
      "轻盈 1\n",
      "压力大 1\n",
      "都行 1\n",
      "刺拉拉 2\n",
      "动力下降 1\n",
      "爆缸 2\n",
      "爆缸  not found\n",
      "很舒适 1\n",
      "不行 5\n",
      "平顺自然 1\n",
      "重新打胶 1\n",
      "打胶  not found\n",
      "高了好多 1\n",
      "空间大动力可以 1\n",
      "优 1\n",
      "正常 3\n",
      "加速缓慢 1\n",
      "声音变小 1\n",
      "内饰豪华 1\n",
      "内饰绝对豪华 1\n",
      "太弱 3\n",
      "内饰不咋地 1\n",
      "不说动力差 1\n",
      "操控性好很多 1\n",
      "没觉动力不够 1\n",
      "动力更好 1\n",
      "加速好点 1\n",
      "好点  not found\n",
      "没事 1\n",
      "一样 1\n",
      "影响散热 1\n",
      "油耗增加 1\n",
      "不解有问题 1\n",
      "动力好点 2\n",
      "好点  not found\n",
      "堪比跑车 1\n",
      "跟不上 1\n",
      "操控很满意 1\n",
      "刹车软 1\n",
      "相当给力 1\n",
      "给力  not found\n",
      "可以忽略 1\n",
      "油耗会上升 1\n",
      "比以前快 1\n",
      "冒烟了 1\n",
      "加速轻轻松松 1\n",
      "弱了点 1\n",
      "优势明显 1\n",
      "可以保持 1\n",
      "没啥区别 1\n",
      "干净 2\n",
      "差不多 5\n",
      "好不到哪去 1\n",
      "油耗好 1\n",
      "噪音有所下降 1\n",
      "发动机温度高 1\n",
      "发动机噪声大 1\n",
      "油耗高 1\n",
      "内饰也可以 1\n",
      "没什么影响 1\n",
      "油耗下降 1\n",
      "兼顾了太多东西 1\n",
      "发动机存在问题 1\n",
      "布局限制 1\n",
      "不会觉得不够用 1\n",
      "动力不足 1\n",
      "适当保养 1\n",
      "空间不小 1\n",
      "疯狂 1\n",
      "一方面 1\n",
      "有点降低 1\n",
      "不如 2\n",
      "已经很好了 1\n",
      "发动机没力 1\n",
      "没力  not found\n",
      "加速抖动 1\n",
      "没感觉 2\n",
      "发动机异味 1\n",
      "动力呵呵 1\n",
      "好不少 1\n",
      "非常猛 1\n",
      "没法比 1\n",
      "发动机一直是弊病 1\n",
      "拔尖 1\n",
      "追不上 1\n",
      "后段不行 1\n",
      "没有问题 1\n",
      "磨损 1\n",
      "不够用 1\n",
      "动力足 1\n",
      "别提了 1\n",
      "急加速不行 1\n",
      "发动机嘶哑 1\n",
      "急加速问题 1\n",
      "差别明显 1\n",
      "宽大舒适 1\n",
      "不足 6\n",
      "减少 1\n",
      "逊色 1\n",
      "比不过 1\n",
      "不满+E+E11:E742 1\n",
      "E11  not found\n",
      "E742  not found\n",
      "噪音减少 1\n",
      "降了 1\n",
      "不如之前 1\n",
      "硬伤 1\n",
      "不咋地 1\n",
      "颇高 1\n",
      "不好 1\n",
      "强些 1\n",
      "强些  not found\n",
      "强劲 3\n",
      "好看，炫酷 1\n",
      "炫酷  not found\n",
      "很差 1\n",
      "噪音大 2\n",
      "捉急 1\n",
      "捉急  not found\n",
      "差些 1\n",
      "差点 1\n",
      "强很多 1\n",
      "漂亮 1\n",
      "声音大 1\n",
      "受不了 1\n",
      "难看 1\n",
      "效果不错 1\n",
      "驾驶质感强 1\n",
      "喜欢 3\n",
      "风险很大 1\n",
      "是好车 1\n",
      "好车  not found\n",
      "外观奇丑无比 1\n",
      "外观太丑 1\n",
      "不擅长 1\n",
      "太好了 1\n",
      "不好受 1\n",
      "不少机油 1\n",
      "老坏 1\n",
      "老坏  not found\n",
      "严重烧机油 1\n",
      "肯定烧 1\n",
      "最好 2\n",
      "偏弱 1\n",
      "偏弱  not found\n",
      "车挺好 1\n",
      "车挺  not found\n",
      "很一般 2\n",
      "别指望 1\n",
      "毛病多 1\n",
      "性能好 2\n",
      "破百 1\n",
      "秒杀 2\n",
      "好一些 1\n",
      "好点 1\n",
      "好点  not found\n",
      "偏磨 1\n",
      "偏磨  not found\n",
      "够了 2\n",
      "胜出 1\n",
      "提升 2\n",
      "也不错 1\n",
      "无敌 1\n",
      "出色 1\n",
      "可靠 2\n",
      "有点性能 1\n",
      "没得说 1\n",
      "不会很快 1\n",
      "超过 1\n",
      "可以的 1\n",
      "更高 1\n",
      "性能高一点 1\n",
      "好性能 1\n",
      "动力牛 1\n",
      "理想 1\n",
      "发动机很新啊 1\n",
      "再贵的车也没有欧蓝德好 1\n",
      "再贵  not found\n",
      "将开启汽车动力的新篇章 1\n",
      "连速腾起步都赶不上 1\n",
      "连速腾  not found\n",
      "离地间隙低 1\n",
      "四驱无敌 1\n",
      "水平对置发动机就是轻啊 1\n",
      "对置  not found\n",
      "动力不行 1\n",
      "动力强多 1\n",
      "强多  not found\n",
      "断轴 1\n",
      "变速箱大众调教得很差 1\n",
      "操纵好，底盘高 1\n",
      "油耗真心棒 1\n",
      "很高 1\n",
      "油耗水平已经很满意 1\n",
      "油耗也不高 1\n",
      "较大后备箱 1\n",
      "发动机噪音确实大 1\n",
      "更硬 1\n",
      "比森林人强不少 1\n",
      "人强  not found\n",
      "维修起来麻烦 1\n",
      "老牛逼。 1\n",
      "高性能发动机 1\n",
      "发动机烧漏机油风险大 1\n",
      "烧漏  not found\n",
      "发动机是好 1\n",
      "变速箱落后是真的 1\n",
      "水平对置发动机 全时四驱是最大卖点 1\n",
      "对置  not found\n",
      "   not found\n",
      "水平对置发动机原来这么稳 1\n",
      "对置  not found\n",
      "没见过跑14万还有那么好的机器 1\n",
      "发动机实在太老 1\n",
      "太老  not found\n",
      "比直列的要轻很多 1\n",
      "车漆软 2\n",
      "车漆真软 1\n",
      "真软  not found\n",
      "空间大 2\n",
      "动力和空间太诱惑了！ 1\n",
      "不为别的，就为了中控台和空间 1\n",
      "空间较大 1\n",
      "水平对置横向占用空间太 1\n",
      "对置  not found\n",
      "配置高 1\n",
      "差很远 1\n",
      "猛 2\n",
      "着迷 1\n",
      "扯淡 1\n",
      "真的不错 1\n",
      "不后悔 2\n",
      "牛逼 1\n",
      "太给力了 1\n",
      "太给力  not found\n",
      "感觉不到 1\n",
      "心理没底 1\n",
      "挺稳的 1\n",
      "挺稳  not found\n",
      "不介意 1\n",
      "算我输 1\n",
      "垃圾 1\n",
      "偶尔 1\n",
      "没烧过 1\n",
      "没烧过机油 1\n",
      "问题多 1\n",
      "很好 1\n",
      "说不清 1\n",
      "无力 4\n",
      "不费力 1\n",
      "有眼光 1\n",
      "不如人 1\n",
      "不贵 1\n",
      "不如别人 1\n",
      "美好 1\n",
      "疲惫 1\n",
      "不忍直视 1\n",
      "足够了 1\n",
      "超车没问题 1\n",
      "轻微损耗 1\n",
      "发愁 1\n",
      "消耗快 1\n",
      "没太多效果 1\n",
      "没太多  not found\n",
      "别动 1\n",
      "嗡嗡响 2\n",
      "麻烦大 1\n",
      "很好用 1\n",
      "成熟 1\n",
      "轻爆声 1\n",
      "爆声  not found\n",
      "进一步增大 1\n",
      "负荷大 1\n",
      "凑乎 1\n",
      "凑乎  not found\n",
      "密封不好渗油 1\n",
      "渗油  not found\n",
      "动力太差 1\n",
      "其他很low 1\n",
      "一脸懵逼 1\n",
      "一脸  not found\n",
      "动力感明显 1\n",
      "也不高 1\n",
      "动力大 1\n",
      "另类 1\n",
      "抖动一会 1\n",
      "动力衰竭 1\n",
      "初段还行 1\n",
      "噪声大 1\n",
      "动力一般 1\n",
      "日常足够 1\n",
      "正常足够用 1\n",
      "没影响 1\n",
      "很给力 1\n",
      "淘汰的发动机 1\n",
      "有点后悔 1\n",
      "偏高 1\n",
      "不太足 1\n",
      "不太足  not found\n",
      "十足 1\n",
      "顺滑 1\n",
      "不给力 1\n",
      "不给力  not found\n",
      "风险极大 1\n",
      "加速不顺畅 1\n",
      "发动机没声音 1\n",
      "噪音大、内饰普通 1\n",
      "空调有问题 1\n",
      "见鬼了 1\n",
      "稳 1\n",
      "顺畅 1\n",
      "找虐 1\n",
      "找虐  not found\n",
      "较强 1\n",
      "不太喜欢 1\n",
      "不太  not found\n",
      "何来性能 1\n",
      "抓地力肯定提高了 1\n",
      "制冷效果差 1\n",
      "没办法吐槽 1\n",
      "有点野性 1\n",
      "各方面优于森林人 1\n",
      "发动机有劲 1\n",
      "比现款更强的，拭目以待吧 1\n",
      "更强  not found\n",
      "发动机比较low 1\n",
      "爱它的水平对置发动机 1\n",
      "对置  not found\n",
      "车漆是真薄 1\n",
      "真薄  not found\n",
      "不用讨论 1\n",
      "在意 1\n",
      "差点 1\n",
      "异响 1\n",
      "变色 1\n",
      "丑 5\n",
      "性价比 1\n",
      "薄 2\n",
      "好看 13\n",
      "逼格高 1\n",
      "逼格  not found\n",
      "不好看 4\n",
      "喜欢 5\n",
      "特别迷恋 1\n",
      "小 1\n",
      "颜值差 1\n",
      "颜值  not found\n",
      "难看 4\n",
      " 好看 1\n",
      "很好 1\n",
      "认可度高 1\n",
      "高 1\n",
      "尊贵 1\n",
      "挺炫 1\n",
      "中庸 1\n",
      "完败 1\n",
      "要求一致 1\n",
      "时尚 1\n",
      "大改 2\n",
      "大改  not found\n",
      "省油 2\n",
      "大气 1\n",
      "下不来 1\n",
      "很够用 1\n",
      "普通 1\n",
      "差 2\n",
      "掰不动 1\n",
      "作用不大 1\n",
      "降低 1\n",
      "纠结 1\n",
      "凶狠的前脸 1\n",
      "高度都一样 1\n",
      "对口味 1\n",
      "不纠结 1\n",
      "看中外观 1\n",
      "   1\n",
      "高配好看 1\n",
      "外观不错 2\n",
      "视野好 1\n",
      "底盘厉害 1\n",
      "硬伤 2\n",
      "不错 3\n",
      "漂亮 4\n",
      "大 1\n",
      "合眼缘 1\n",
      "好 2\n",
      "倾斜大 1\n",
      "里外不一 1\n",
      "摇晃 1\n",
      "平庸 1\n",
      "越丑 1\n",
      "越丑  not found\n",
      "不符合 1\n",
      "不协调 2\n",
      "真难看 2\n",
      "好难看 1\n",
      "外形好看 1\n",
      "上了一个档次了，隐隐有豪华车的感觉 1\n",
      "外观没有丝毫质感！     1\n",
      "外观奇丑无比 1\n",
      "外观太小气 1\n",
      "外观太丑 1\n",
      "外观真心赞 1\n",
      "还不如十万块的国产车 1\n",
      "十万块  not found\n",
      "一次没修过 1\n",
      "一次  not found\n",
      "没修过  not found\n",
      "挫样 1\n",
      "挫样  not found\n",
      "秀气 1\n",
      "接受不了 1\n",
      "廉价 1\n",
      "秒杀 1\n",
      "不在意 1\n",
      "软 2\n",
      "厚 1\n",
      "不敢恭维 1\n",
      "强化 1\n",
      "有点土 1\n",
      "硬朗 1\n",
      "一股丰田味 1\n",
      "一股  not found\n",
      "无所谓 1\n",
      "不咋地  1\n",
      "这么看确实大气点 1\n",
      "无力吐槽 1\n",
      "颜色和光线还可以 1\n",
      "看着真舒服 1\n",
      "觉得19款没有老款好看 1\n",
      "老款  not found\n",
      "很耐看 1\n",
      "大气，中规中矩！ 1\n",
      "外形至少本人觉得新款还可以 1\n",
      "拼凑杀马特 1\n",
      "尾灯好丑 1\n",
      "史上最难看 1\n",
      "低调 1\n",
      "油耗低 1\n",
      "不高 1\n",
      "不高  not found\n",
      "水平对置发动机 全时四驱是最大卖点 1\n",
      "对置  not found\n",
      "   not found\n",
      "比直列的要轻很多 1\n",
      "车漆薄 1\n",
      "日系车漆软 1\n",
      "日系车  not found\n",
      "漆软  not found\n",
      "车漆软 2\n",
      "车漆真软 1\n",
      "真软  not found\n",
      "喜欢这种前脸 1\n",
      "前脸  not found\n",
      "前脸设计看着精致，协调了 1\n",
      "前脸  not found\n",
      "前脸无爱 1\n",
      "前脸  not found\n",
      "无爱  not found\n",
      "镀铬太多 让人尴尬 1\n",
      "   not found\n",
      "前脸更好看 1\n",
      "前脸  not found\n",
      "后排太硬。 1\n",
      "太硬  not found\n",
      "空间也大 1\n",
      "不像换代应有的水平 1\n",
      "不像  not found\n",
      "多丑 1\n",
      "多丑  not found\n",
      "吊打 1\n",
      "配置高 1\n",
      "个人觉得比较好看 1\n",
      "不怎么好看 1\n",
      "帅气~ 1\n",
      "社会社会！ 1\n",
      "低级无下限 1\n",
      "新款厚重大气 1\n",
      "真的不错 1\n",
      "仍觉得好 1\n",
      "仁者见仁 1\n",
      "耐看 1\n",
      "耐脏 1\n",
      "不耐脏 1\n",
      "更好看 1\n",
      "不如别人 1\n",
      "我忍了 1\n",
      "我忍  not found\n",
      "清洗 1\n",
      "没有完美的只有适用的 1\n",
      "大头娃娃 1\n",
      "糊弄 1\n",
      "更有层次感 1\n",
      "失望 1\n",
      "不一致 1\n",
      "更霸气 1\n",
      "不喜欢 1\n",
      "车漆是真薄 1\n",
      "真薄  not found\n",
      "前脸倒是2018款的好看 1\n",
      "前脸  not found\n",
      "完胜 1\n",
      "莫名妖娆尾灯的混合体 1\n",
      "太国产了 1\n",
      "耐用 1\n",
      "软 3\n",
      "不强 1\n",
      "差劲 1\n",
      "不耐磨 1\n",
      "刹不住、软 1\n",
      "心寒 1\n",
      "刹不住 1\n",
      "刹车软，刹车行程距离长 1\n",
      "不行 2\n",
      "不给力 1\n",
      "不给力  not found\n",
      "有问题 1\n",
      "不错 4\n",
      "认可度高 1\n",
      "要求一致 1\n",
      "没落锁 1\n",
      "安全 2\n",
      "第一 1\n",
      "高 2\n",
      "全时四驱 2\n",
      "强 1\n",
      "刹车软 2\n",
      "省油 1\n",
      "甩三条街 1\n",
      "三条  not found\n",
      "便宜 1\n",
      "没宽敞 1\n",
      "超好用 1\n",
      "超好  not found\n",
      "最适合 1\n",
      "恼人 1\n",
      "较好 1\n",
      "有效果 1\n",
      "空间大 1\n",
      "急刹车 1\n",
      "风力强劲 1\n",
      "喜欢他的机械手刹 1\n",
      "刹车怎么这么软呢   1\n",
      "卤素灯明显穿透力更强一些 1\n",
      "更强  not found\n",
      "急加速 1\n",
      "设计要是再简练一些就好了 1\n",
      "高度只有180mm 1\n",
      "180mm  not found\n",
      "油耗真心棒 1\n",
      "最低油耗5.9驾驶体验超级棒 1\n",
      "更硬 1\n",
      "刹车脚感非常细腻，适合微操 1\n",
      "脚感  not found\n",
      "还完刹车线性了，随踩随有挺好    1\n",
      "随有  not found\n",
      "避震我觉得挺好，够硬 1\n",
      "很高 1\n",
      "摆动 1\n",
      "无奈 1\n",
      "不让人省心 1\n",
      "算我输 1\n",
      "不太安全 1\n",
      "不太  not found\n",
      "减配 1\n",
      "减配  not found\n",
      "没质感 1\n",
      "好 1\n",
      "烂 1\n",
      "破 1\n",
      "可靠 1\n",
      "强度 1\n",
      "老化 1\n",
      "高很多 1\n",
      "不费力 1\n",
      "剎不住啊剎不住 1\n",
      "啊剎  not found\n",
      "刹车异响 1\n",
      "喜欢 1\n",
      "结实 1\n",
      "差 1\n",
      "过硬 1\n",
      "很少出现 1\n",
      "很赞 1\n",
      "很赞  not found\n",
      "差很多 1\n",
      "座椅开裂 1\n",
      "没办法吐槽 1\n",
      "有点野性 1\n",
      "都不错    1\n",
      "不爽 1\n",
      "还行 1\n",
      "满意 8\n",
      "省油 20\n",
      "不省油 3\n",
      "略高 1\n",
      "知足 1\n",
      "还得 1\n",
      "不高 3\n",
      "不高  not found\n",
      "低 15\n",
      "没感觉 1\n",
      "差不多 2\n",
      "高 12\n",
      "没增加 1\n",
      "足 1\n",
      "市区 7\n",
      "左右 2\n",
      "不公平 1\n",
      "全行业造假 1\n",
      "真不喜欢 1\n",
      "费油 10\n",
      "费油  not found\n",
      "不错 5\n",
      "够 1\n",
      "大 4\n",
      "颜值差 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "颜值  not found\n",
      "偏大 1\n",
      "完败 2\n",
      "烧机油 2\n",
      "差 2\n",
      "可以 1\n",
      "没问题 1\n",
      "升高 1\n",
      "噪音大 1\n",
      "噪音小 1\n",
      "增加 5\n",
      "油耗大 2\n",
      "正常 4\n",
      "油耗上升 1\n",
      "无毛病 1\n",
      "没有任何油耗现象 1\n",
      "平顺自然 1\n",
      "也不高 1\n",
      "好一些 1\n",
      "没必要做 1\n",
      "给力 2\n",
      "给力  not found\n",
      "额外惊喜 1\n",
      "哈哈 1\n",
      "油耗增加 2\n",
      "油耗会上升 1\n",
      "没有任何问题 1\n",
      "没啥区别 1\n",
      "油耗好 1\n",
      "油耗高 1\n",
      "内饰也可以 1\n",
      "顶不住 1\n",
      "不错的 1\n",
      "油耗下降 1\n",
      "不会高 2\n",
      "很低 1\n",
      "油耗低一些 1\n",
      "油耗太高 1\n",
      "油耗造假 1\n",
      "增加油耗 1\n",
      "油耗高一个 1\n",
      "没怎么关心 1\n",
      "油耗有上升 1\n",
      "稳定 1\n",
      "油耗哗哗的 1\n",
      "无变化 1\n",
      "比以前多 1\n",
      "还可以 2\n",
      "省油的 1\n",
      "油耗很高 2\n",
      "不可能低 1\n",
      "高了点 1\n",
      "高点 1\n",
      "颇高 1\n",
      "高很多 1\n",
      "特大 1\n",
      "废油 1\n",
      "惊喜 1\n",
      "大不少 1\n",
      "贼高 1\n",
      "效果不错 1\n",
      "不好 1\n",
      "恼人 1\n",
      "相当棒 1\n",
      "非常满意 1\n",
      "油耗蹭蹭的往上涨 1\n",
      "性价比低 1\n",
      "油耗不好控制 1\n",
      "耐用 1\n",
      "油耗没那么高 1\n",
      "太高 1\n",
      "值得拥有 1\n",
      "确实省 1\n",
      "胜出 1\n",
      "稍微高点 1\n",
      "高一点 1\n",
      "大些 1\n",
      "够可以 1\n",
      "油耗低 5\n",
      "这油耗牛啊 1\n",
      "方向盘比较轻 1\n",
      "操纵好，底盘高 1\n",
      "2.0森市区油耗真心不错， 1\n",
      "油耗真心棒 1\n",
      "最低油耗5.9驾驶体验超级棒 1\n",
      "低调 1\n",
      "非常好 1\n",
      "很高 1\n",
      "省油不少  1\n",
      "油耗确实很高 1\n",
      "油耗水平已经很满意 1\n",
      "油耗也不高 1\n",
      "后排太硬。 1\n",
      "太硬  not found\n",
      "一般般 1\n",
      "一般般  not found\n",
      "爱咋滴就咋滴 1\n",
      "不敢恭维 1\n",
      "不着调 1\n",
      "不见得省 1\n",
      "不一定费油 1\n",
      "费油  not found\n",
      "才 1\n",
      "省 1\n",
      "城市 1\n",
      "省油环保 1\n",
      "不高、经济 1\n",
      "不高  not found\n",
      "不低 1\n",
      "不低  not found\n",
      "推荐 1\n",
      "结实 1\n",
      "谁买谁后悔 1\n",
      "油耗奇高 1\n",
      "奇高  not found\n",
      "油耗升了 1\n",
      "油耗不低 1\n",
      "不低  not found\n",
      "油耗会下来 1\n",
      "真高 1\n",
      "特费油 1\n",
      "特费油  not found\n",
      "不靠谱 1\n",
      "较低 1\n",
      "很满意 1\n",
      "不费油 1\n",
      "不费油  not found\n",
      "稍微低 1\n",
      "心真累 1\n",
      "有点儿高 1\n",
      "耍流氓 1\n",
      "神奇无比 1\n",
      "不必在乎 1\n",
      "好些 1\n",
      "满足 1\n",
      "小 12\n",
      "都不错 1\n",
      "大 20\n",
      "硬伤 2\n",
      "真不喜欢 1\n",
      "宽敞 2\n",
      "不好看 1\n",
      "后排挤 1\n",
      "不行 1\n",
      "烧机油 1\n",
      "坐进去最放松的 1\n",
      "钻进去 1\n",
      "好 3\n",
      "不感冒 1\n",
      "大  1\n",
      "别奢望 1\n",
      "出色 1\n",
      "面包车 1\n",
      "省油 1\n",
      "杀手锏 1\n",
      "差别没那么大 1\n",
      "不错 2\n",
      "全塑料 1\n",
      "难看 1\n",
      "有动力 1\n",
      "车体高 1\n",
      "均衡 1\n",
      "差不多 1\n",
      "舒服 4\n",
      "空间不行 3\n",
      "实用 1\n",
      "豪华感强 1\n",
      "感强  not found\n",
      "各方面都好些 1\n",
      "不差 1\n",
      "不差  not found\n",
      "舒服的很 1\n",
      "有保障 1\n",
      "空间大动力可以 1\n",
      "内饰豪华 1\n",
      "内饰绝对豪华 1\n",
      "空间大 6\n",
      "非常好 1\n",
      "空间有点 1\n",
      "不敢恭维 1\n",
      "后排空间小了点 1\n",
      "内饰低档 1\n",
      "空间不小 1\n",
      "特别不舒服 1\n",
      "容量大 1\n",
      "只要是空间小 1\n",
      "油耗低一些 1\n",
      "宽大舒适 1\n",
      "特大 1\n",
      "不满+E+E11:E742 1\n",
      "E11  not found\n",
      "E742  not found\n",
      "噪音减少 1\n",
      "小一点 2\n",
      "低 2\n",
      "差一点 1\n",
      "甩三条街 1\n",
      "三条  not found\n",
      "合眼缘 1\n",
      "没宽敞 1\n",
      "有优势 1\n",
      "差 1\n",
      "更方便 1\n",
      "杠杠的 1\n",
      "挺好的 1\n",
      "喜欢 1\n",
      "还行 1\n",
      "外形好看 1\n",
      "比森林人舒服多了 1\n",
      "外观没有丝毫质感！     1\n",
      "不小 1\n",
      "有异响 2\n",
      "太小 1\n",
      "高点 1\n",
      "胜出 1\n",
      "大了很多 1\n",
      "完败 1\n",
      "漂亮 1\n",
      "抓狂 1\n",
      "不咋地 1\n",
      "理想 1\n",
      "是大的 1\n",
      "大空间 1\n",
      "轴距和空间严重不符 1\n",
      "坐姿高视线更好 1\n",
      "高度只有180mm 1\n",
      "180mm  not found\n",
      "低调 1\n",
      "较大后备箱 1\n",
      "比森林人强不少 1\n",
      "人强  not found\n",
      "空间坐姿都好点 1\n",
      "好点  not found\n",
      "动力和空间太诱惑了！ 1\n",
      "不为别的，就为了中控台和空间 1\n",
      "车内空间蛮大 1\n",
      "蛮大  not found\n",
      "操控很满意 1\n",
      "空间较大 1\n",
      "空间也大 1\n",
      "更好 1\n",
      "小了点 1\n",
      "太小了 1\n",
      "比不了 1\n",
      "异响 1\n",
      "不如 2\n",
      "卖点 1\n",
      "足够 1\n",
      "空间足够 1\n",
      "太挤了 1\n",
      "太挤  not found\n",
      "座位太低 1\n",
      "更霸气 1\n",
      "还小 1\n",
      "舒适度也很高 1\n",
      "各方面优于森林人 1\n",
      "静音 5\n",
      "原厂 1\n",
      "空调 5\n",
      "大 6\n",
      "坑 1\n",
      "异响 25\n",
      "噪音 15\n",
      "噪音大 14\n",
      "小 2\n",
      "好 8\n",
      "颠 1\n",
      "不舒服 3\n",
      "有问题打不开 1\n",
      "乱响 1\n",
      "乱响  not found\n",
      "宽敞 1\n",
      "卡顿 1\n",
      "烧机油 10\n",
      "不咋地 1\n",
      "质量很差 1\n",
      "腰疼 1\n",
      "抖 2\n",
      "气门声音大     1\n",
      "响 18\n",
      "空调很牛逼 1\n",
      "隔音不好 1\n",
      "可以 1\n",
      "忽悠 容易晕车 1\n",
      "   not found\n",
      "喜欢 1\n",
      "唯一不足 1\n",
      "大  1\n",
      "无法比 1\n",
      "颜值差 1\n",
      "颜值  not found\n",
      "改观不少 1\n",
      "看中座椅记忆 1\n",
      "丑 1\n",
      "提升太多 1\n",
      "差 2\n",
      "省油 1\n",
      "没问题 1\n",
      "噪音小 3\n",
      "没落锁 1\n",
      "声音很大 1\n",
      "不是很舒适 1\n",
      "颠簸 1\n",
      "有点影响 1\n",
      "有味 1\n",
      "转速低 1\n",
      "噪音较大 1\n",
      "放弃 1\n",
      "噪音不大 1\n",
      "不行 1\n",
      "较差 1\n",
      "有噪音 2\n",
      "受不了 2\n",
      "味特别大 1\n",
      "恒温 1\n",
      "有声音 1\n",
      "空调不咋样 1\n",
      "冻得 1\n",
      "空间不行 1\n",
      "耳朵不好了 1\n",
      "正常 4\n",
      "动力好点 1\n",
      "好点  not found\n",
      "音响太差 1\n",
      "刹车软 1\n",
      "忽略不计 1\n",
      "可以忽略 1\n",
      "比以前快 1\n",
      "制冷效果好 1\n",
      "掩盖噪音 1\n",
      "油耗好 1\n",
      "噪音有所下降 1\n",
      "开空调滴水 1\n",
      "滴水现象 1\n",
      "一般 1\n",
      "很冷 1\n",
      "不给力 1\n",
      "不给力  not found\n",
      "油耗高 1\n",
      "变冰箱 1\n",
      "确实不好 1\n",
      "油耗下降 1\n",
      "适当保养 1\n",
      "空间不小 1\n",
      "底盘漏油 1\n",
      "很低 1\n",
      "满意 1\n",
      "速度快噪音大 1\n",
      "没用 1\n",
      "无变化 1\n",
      "开空调没劲 1\n",
      "比不过 1\n",
      "噪音减少 1\n",
      "有点提升 1\n",
      "差一点 1\n",
      "低 1\n",
      "声音大 2\n",
      "不错 4\n",
      "噪音依旧 1\n",
      "抖抖 1\n",
      "大不少 1\n",
      "好听 1\n",
      "好用 1\n",
      "好用  not found\n",
      "难受 1\n",
      "不方便 1\n",
      "发出声音 1\n",
      "倾斜大 1\n",
      "好点 1\n",
      "好点  not found\n",
      "还可以 1\n",
      "蛮好 1\n",
      "风噪最大 1\n",
      "风噪  not found\n",
      "比较差 1\n",
      "原胎声音我是受不了 1\n",
      "原胎  not found\n",
      "不严实  1\n",
      "有异响 2\n",
      "给力 3\n",
      "给力  not found\n",
      "没有 1\n",
      "出问题 2\n",
      "偏低 1\n",
      "硬 1\n",
      "不严实 1\n",
      "高 2\n",
      "毛病多 1\n",
      "风噪大 1\n",
      "风噪大  not found\n",
      "可以了 1\n",
      "很好用 3\n",
      "滋滋声 1\n",
      "土腥味 1\n",
      "特别抖 1\n",
      "制冷快 1\n",
      "抖动 1\n",
      "大些 1\n",
      "摩擦音 1\n",
      "气味呛人 1\n",
      "非常好 2\n",
      "风力强劲 1\n",
      "噪音低 1\n",
      "异响比较多 1\n",
      "坐姿高视线更好 1\n",
      "后排座椅 格叽格叽响 1\n",
      "   not found\n",
      "格叽格  not found\n",
      "叽响  not found\n",
      "有点糙 1\n",
      "断轴 1\n",
      "方向盘比较轻 1\n",
      "隔音还不如森 1\n",
      "底盘不是很紧凑  1\n",
      "高度只有180mm 1\n",
      "180mm  not found\n",
      "2.0森市区油耗真心不错， 1\n",
      "最低油耗5.9驾驶体验超级棒 1\n",
      "油耗低 1\n",
      "不高 1\n",
      "不高  not found\n",
      "油耗水平已经很满意 1\n",
      "有金属敲击的声音 1\n",
      "风噪真心大 1\n",
      "风噪  not found\n",
      "风噪是大了点 1\n",
      "风噪  not found\n",
      "发动机噪音确实大 1\n",
      "隔音效果一般 1\n",
      "静音舒适 1\n",
      "空间大 2\n",
      "空间坐姿都好点 1\n",
      "好点  not found\n",
      "动力和空间太诱惑了！ 1\n",
      "操控很满意 1\n",
      "空间较大 1\n",
      "后排太硬。 1\n",
      "太硬  not found\n",
      "颈椎有点累  1\n",
      "X1的座椅小的可怜 1\n",
      "X1  not found\n",
      "一般般 1\n",
      "一般般  not found\n",
      "还行吧 1\n",
      "扯淡 1\n",
      "不介意 1\n",
      "很差 1\n",
      "凉 1\n",
      "质量好 1\n",
      "有一点噪音 1\n",
      "真不错 1\n",
      "有提高了 1\n",
      "不如别人 1\n",
      "有点硬 1\n",
      "风噪音 1\n",
      "不能忍 1\n",
      " 噪音大 1\n",
      "免疫 1\n",
      "空调不行 1\n",
      "噪音稍微好些 1\n",
      "进一步增大 1\n",
      "基本上没噪音了 1\n",
      "不贵 1\n",
      "没办法 2\n",
      "麻烦 1\n",
      "噪声大 1\n",
      "别太在意 1\n",
      "别太  not found\n",
      "舒服 1\n",
      "不小 1\n",
      "奇葩 1\n",
      "非常差 1\n",
      "操控更好 1\n",
      "问题 1\n",
      "坏 2\n",
      "强 1\n",
      "土味 1\n",
      "何来性能 1\n",
      "风噪大很多 1\n",
      "风噪大  not found\n",
      "制冷效果差 1\n",
      "舒适度也很高 1\n",
      "不错，无噪音 1\n",
      "优点安静 1\n",
      "有点儿高 1\n",
      "费劲 1\n",
      "差点 1\n",
      "平衡 3\n",
      "一流 1\n",
      "追求 1\n",
      "良好 1\n",
      "第一 1\n",
      "灵活 2\n",
      "垃圾 1\n",
      "很好 4\n",
      "完胜 1\n",
      "好用 1\n",
      "好用  not found\n",
      "大 4\n",
      "换 1\n",
      "规格 1\n",
      "专业 1\n",
      "漂移 1\n",
      "没敢玩 1\n",
      "没敢  not found\n",
      "不错 12\n",
      "不给力 1\n",
      "不给力  not found\n",
      "好感 1\n",
      "就那么回事 1\n",
      "渗油 1\n",
      "渗油  not found\n",
      "烧机油 1\n",
      "可以 2\n",
      "喜欢 5\n",
      "不如 1\n",
      "无法比 2\n",
      "颜值差 1\n",
      "颜值  not found\n",
      "打95分 1\n",
      "高 4\n",
      "好看 1\n",
      "改了也不够 1\n",
      "完败 1\n",
      "舒服 5\n",
      "要求一致 1\n",
      "大改 1\n",
      "大改  not found\n",
      "下不来 1\n",
      "值得 1\n",
      "不飘 1\n",
      "不飘  not found\n",
      "好 9\n",
      "一般 2\n",
      "比不了 1\n",
      "承受不起 1\n",
      "不起  not found\n",
      "厉害 2\n",
      "灵巧 1\n",
      "没问题 3\n",
      "省油 3\n",
      "异响 2\n",
      "偏硬 1\n",
      "偏硬  not found\n",
      "静音不好 1\n",
      "更硬 1\n",
      "很韧性 1\n",
      "优势 2\n",
      "很稳 1\n",
      "更好 2\n",
      "很准 1\n",
      "很准  not found\n",
      "好点 1\n",
      "好点  not found\n",
      "操控感 2\n",
      "换代慢 1\n",
      "安全 1\n",
      "噪音偏大 1\n",
      "转速低 1\n",
      "差不多 2\n",
      "强 4\n",
      "抖 1\n",
      "响 1\n",
      "差劲 1\n",
      "优点 3\n",
      "轻松 1\n",
      "刹车盘 1\n",
      "忍住没改 1\n",
      "没改  not found\n",
      "舒适感强 1\n",
      "舒适感  not found\n",
      "操控也好 1\n",
      "刺拉拉 1\n",
      "掉漆 1\n",
      "重新低一些 1\n",
      "操控真心不错 1\n",
      "犹如宝马一般 1\n",
      "比较好 1\n",
      "优 1\n",
      "内饰绝对豪华 1\n",
      "确实很棒 1\n",
      "没那么松散 1\n",
      "自信多了 1\n",
      "操控性好很多 1\n",
      "告诉抖动 1\n",
      "相当好 2\n",
      "操控很满意 2\n",
      "嘎嘎响 1\n",
      "嘎嘎响  not found\n",
      "操控很棒 1\n",
      "没有任何问题 1\n",
      "底盘更高 1\n",
      "不舒服 1\n",
      "可以调节 1\n",
      "没什么影响 1\n",
      "没有磨损 1\n",
      "手感不错 1\n",
      "脱皮 1\n",
      "兼顾了太多东西 1\n",
      "底盘最高 1\n",
      "有变化 1\n",
      "底盘漏油 1\n",
      "烂了无所谓 1\n",
      "上了一个档次 1\n",
      "视野好 1\n",
      "底盘厉害 1\n",
      "走烂路很颠 1\n",
      "走烂路  not found\n",
      "控制不了 2\n",
      "磨得发亮 1\n",
      "磨得  not found\n",
      "紧凑 1\n",
      "看点是操控 1\n",
      "不费劲 1\n",
      "还可以 1\n",
      "宽大舒适 1\n",
      "比不过 1\n",
      "差 3\n",
      "小一点 2\n",
      "增加 2\n",
      "低 2\n",
      "便宜 2\n",
      "好看，炫酷 1\n",
      "炫酷  not found\n",
      "甩三条街 1\n",
      "三条  not found\n",
      "合眼缘 1\n",
      "没宽敞 1\n",
      "全 1\n",
      "噪音依旧 1\n",
      "性能好 4\n",
      "没的说 3\n",
      "坐姿高 1\n",
      "味道真不小 1\n",
      "满意 1\n",
      "驾驶质感强 1\n",
      "廉价 1\n",
      "蛮好 1\n",
      "给力 1\n",
      "给力  not found\n",
      "安心 1\n",
      "欠缺 1\n",
      "原胎声音我是受不了 1\n",
      "原胎  not found\n",
      "废刹车 1\n",
      "质量好 1\n",
      "完美 1\n",
      "有 1\n",
      "通病 1\n",
      "肯定 1\n",
      "刹不住 1\n",
      "随性 1\n",
      "操控好 1\n",
      "较好 1\n",
      "稳 1\n",
      "破百 1\n",
      "很一般 1\n",
      "胜出 1\n",
      "空间大 1\n",
      "也不错 1\n",
      "最好 1\n",
      "无敌 1\n",
      "丰富 1\n",
      "闻名 1\n",
      "可靠 2\n",
      "有点性能 1\n",
      "没得说 1\n",
      "高一点 1\n",
      "超过 1\n",
      "可以的 1\n",
      "差很多 1\n",
      "非常好 2\n",
      "略胜一筹 1\n",
      "性能高一点 1\n",
      "好性能 1\n",
      "理想 1\n",
      "够可以 1\n",
      "有提升 1\n",
      "距离短 1\n",
      "方向好飘，方向盘总是要不断修正方向 1\n",
      "车身的反应较迟钝。 1\n",
      "刹车怎么这么软呢   1\n",
      "比较灵 1\n",
      "离地间隙低 1\n",
      "有点糙 1\n",
      "方向盘比较轻 1\n",
      "需要一直微调 1\n",
      "操纵好，底盘高 1\n",
      "底盘不是很紧凑  1\n",
      "设计要是再简练一些就好了 1\n",
      "高度只有180mm 1\n",
      "180mm  not found\n",
      "2.0森市区油耗真心不错， 1\n",
      "较大后备箱 1\n",
      "变速箱落后是真的 1\n",
      "没见过跑14万还有那么好的机器 1\n",
      "配置高 1\n",
      "就要这么开 1\n",
      "飞叉叉 1\n",
      "方向盘飘 1\n",
      "基本可以 1\n",
      "飘 1\n",
      "摆动 1\n",
      "着迷 1\n",
      "别纠结 1\n",
      "不后悔 1\n",
      "牛逼 1\n",
      "不解释 1\n",
      "挺稳的 1\n",
      "挺稳  not found\n",
      "很危险 1\n",
      "直径 1\n",
      "小 1\n",
      "不贵 1\n",
      "不足 1\n",
      "不会高 1\n",
      "松散 2\n",
      "声音明显 1\n",
      "舒适 1\n",
      "太累 1\n",
      "太累  not found\n",
      "拖刹 1\n",
      "拖刹  not found\n",
      "不想管了 1\n",
      "不抖 1\n",
      "不抖  not found\n",
      "方向盘出白烟 1\n",
      "进一步增大 1\n",
      "其他很low 1\n",
      "好一点 1\n",
      "音量调节不好用 1\n",
      "为了操控 1\n",
      "操控不错 1\n",
      "没啥用 1\n",
      "操控更好 1\n",
      "较强 1\n",
      "不跑偏 1\n",
      "够了 1\n",
      "何来性能 1\n",
      "底盘很稳 1\n",
      "抓地力肯定提高了 1\n",
      "没办法吐槽 1\n",
      "操控变差 1\n",
      "各方面优于森林人 1\n",
      "前脸倒是2018款的好看 1\n",
      "前脸  not found\n",
      "秒杀 1\n",
      "实用 2\n",
      "没 1\n",
      "好 4\n",
      "不实用 1\n",
      "OK 1\n",
      "背光 1\n",
      "不错 5\n",
      "没有 1\n",
      "山寨 1\n",
      "不值钱 1\n",
      "贵 1\n",
      "比不上 1\n",
      "黑 1\n",
      "垃圾 7\n",
      "差 4\n",
      "卡死 1\n",
      "卡死  not found\n",
      "黑屏 2\n",
      "惨不忍睹 1\n",
      "值 2\n",
      "鸡肋 3\n",
      "连线 1\n",
      "简单 1\n",
      "不太喜欢 1\n",
      "不太  not found\n",
      "坑 3\n",
      "都不错 1\n",
      "异响 1\n",
      "后窗 1\n",
      "国内减配 1\n",
      "减配  not found\n",
      "不错不错 1\n",
      "不行 2\n",
      "一点问题都没有 1\n",
      "大  1\n",
      "出色 1\n",
      "全塑料 1\n",
      "高 8\n",
      " 好看 1\n",
      "性价比高 3\n",
      "喜欢 1\n",
      "改观不少 1\n",
      "缺失遗憾 1\n",
      "顶级 1\n",
      "超前 1\n",
      "值得 1\n",
      "低 4\n",
      "后悔 1\n",
      "买它的机械性能 1\n",
      "不能忍 2\n",
      "打95分 1\n",
      "不咋样 2\n",
      "不太重视 1\n",
      "不太  not found\n",
      "偏大 1\n",
      "很好 1\n",
      "不选 1\n",
      "不选  not found\n",
      "丑 1\n",
      "代价大 1\n",
      "无所谓 1\n",
      "不如 1\n",
      "不是大改款 1\n",
      "改款  not found\n",
      "电动尾门太慢 1\n",
      "尾门  not found\n",
      "承受不起 1\n",
      "不起  not found\n",
      "没落锁 1\n",
      "均衡 1\n",
      "强 1\n",
      "省钱 1\n",
      "没必要 1\n",
      "有声音 1\n",
      "有手机导航 1\n",
      "掉了链子 1\n",
      "垃圾导航 1\n",
      "没有导航 2\n",
      "手机导航方便 1\n",
      "用导航不少 1\n",
      "手机导航 5\n",
      "都一样 1\n",
      "不带导航 1\n",
      "不带  not found\n",
      "车载导航带到沟里 1\n",
      "硬伤 2\n",
      "有点提升 1\n",
      "没优势 1\n",
      "厚道 1\n",
      "合眼缘 1\n",
      "全 1\n",
      "漂亮 1\n",
      "不方便 1\n",
      "太垃圾 1\n",
      "太扯了 1\n",
      "太扯  not found\n",
      "满意 2\n",
      "还可以 1\n",
      "非常不错 1\n",
      "比较合理 1\n",
      "不值得 1\n",
      "蛮好 1\n",
      "不值 1\n",
      "配置简陋 1\n",
      "很差 1\n",
      "强太多 1\n",
      "强太多  not found\n",
      "简配太厉害 1\n",
      "简配  not found\n",
      "主导航垃圾 1\n",
      "坑爹 1\n",
      "坑爹  not found\n",
      "合适 1\n",
      "胜出 1\n",
      "提升 2\n",
      "吐槽 1\n",
      "丰富 1\n",
      "一般 1\n",
      "够了 1\n",
      "秒杀 1\n",
      "其他配置真心喜欢 1\n",
      "喜欢他的机械手刹 1\n",
      "四驱好能安全一点  1\n",
      "米其林3ST，安静，舒适 1\n",
      "3ST  not found\n",
      "再贵的车也没有欧蓝德好 1\n",
      "再贵  not found\n",
      "肯定四驱欧蓝德好     1\n",
      "压倒性的说欧蓝德更好 1\n",
      "已成绝唱 1\n",
      "好用 2\n",
      "好用  not found\n",
      "有点糙 1\n",
      "拼凑杀马特 1\n",
      "机舱里走管子不好走 1\n",
      "机械上还是很皮实的 1\n",
      "断轴 1\n",
      "方向盘比较轻 1\n",
      "操纵好，底盘高 1\n",
      "2.0森市区油耗真心不错， 1\n",
      "最低油耗5.9驾驶体验超级棒 1\n",
      "油耗低 1\n",
      "开着各方面都舒服 1\n",
      "开着  not found\n",
      "只有优点，缺点几乎为零 1\n",
      "配置高 1\n",
      "配置比较低 1\n",
      "蛮期待 SGP 对新森的影响 1\n",
      "   not found\n",
      "   not found\n",
      "新森  not found\n",
      "新款厚重大气 1\n",
      "好的不是一点 1\n",
      "音质不行 1\n",
      "是否可以 1\n",
      "很良心 1\n",
      "心里凉凉的 1\n",
      "早日完蛋 1\n",
      "不后悔 1\n",
      "不让人省心 1\n",
      "没区别 1\n",
      "齐全 1\n",
      "不怎么样 1\n",
      "换 1\n",
      "大 1\n",
      "有眼光 1\n",
      "差不太多 1\n",
      "差不太多  not found\n",
      "不纠结 1\n",
      "有提高了 1\n",
      "省心 1\n",
      "正常 2\n",
      "方便 1\n",
      "没用 1\n",
      "没导航 1\n",
      "导航很山寨 1\n",
      "导航坏了 1\n",
      "导航山寨 1\n",
      "山寨导航 1\n",
      " 低 1\n",
      "车载导航没意思 1\n",
      "比欧蓝德的四驱强多了 1\n",
      "驱强  not found\n",
      "确实是不好用 1\n",
      "发动机比较low 1\n"
     ]
    }
   ],
   "source": [
    "sent_embedding = np.zeros((10, 300), dtype='float32')\n",
    "for i in range(10):\n",
    "    e = 0.0\n",
    "    ith_dict = sent_dict[i]\n",
    "    for key, value in ith_dict.items():\n",
    "        print(key, value)\n",
    "        for char in jieba.cut(key.strip(), cut_all=False):\n",
    "            if char in embeddings_index:\n",
    "                e += embeddings_index[char] * value / sum(ith_dict.values())\n",
    "            else:\n",
    "                print(char, ' not found')\n",
    "    sent_embedding[i] = e"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(sent_embedding, open('../../data/sentiment_embedding.pkl', 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.0 查看数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d70e7b339f4641e988ff2ea0fa692185",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "86b51917b2994cbc917c1c8447f30991",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "jp_df = pd.read_csv('../../data/csvs/round2zh2jp.csv')\n",
    "en_df = pd.read_csv('../../data/csvs/round2zh2en.csv')\n",
    "\n",
    "def cut(cut_df):\n",
    "    chars = []\n",
    "    words = []\n",
    "    for s in tqdm(cut_df['text']):\n",
    "        s = str(s)\n",
    "        chars.append(' '.join(list(s)).strip())\n",
    "        words.append(' '.join(jieba.cut(s, cut_all=False)).strip())\n",
    "    os.makedirs('../../data/csvs', exist_ok=True)\n",
    "    cut_df['word'] = words\n",
    "    cut_df['char'] = chars\n",
    "    return cut_df\n",
    "jp_df = cut(jp_df)\n",
    "en_df = cut(en_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 0    8488\n",
       " 1    2048\n",
       "-1    2036\n",
       "Name: sentiment_value, dtype: int64"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df = pd.read_csv('../../data/raw_data/train_2.csv')\n",
    "test_df = pd.read_csv('../../data/raw_data/test_public_2v3.csv')\n",
    "\n",
    "# len(test_df[test_df.apply(lambda x:x['subject'] in x['content'], axis=1)])\n",
    "train_df['sentiment_value'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 635974/635974 [00:35<00:00, 18113.65it/s]\n"
     ]
    },
    {
     "ename": "IndexError",
     "evalue": "list assignment index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-85-789cb829d4ad>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfreq_dict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mword2vec\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m         \u001b[0msaved_token\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msaved_token\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mword2vec\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtoken\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakedirs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../../data/qiuqiu/'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexist_ok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mIndexError\u001b[0m: list assignment index out of range"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "tmp_df = pd.read_csv('../../data/csvs/train_multi.csv')\n",
    "tmp_df[:100]\n",
    "words = list(tmp_df['word'])\n",
    "freq_dict = {}\n",
    "for w in words:\n",
    "    for token in w.split():\n",
    "        freq_dict[token] = freq_dict.get(token, 0) + 1\n",
    "freq_dict = sorted(freq_dict.items(), key=lambda x:x[1], reverse=True)\n",
    "\n",
    "\n",
    "saved_token = []\n",
    "word2vec = {}\n",
    "with open('../../data/word2vec_models/sgns.baidubaike.bigram-char', 'r') as fin:\n",
    "    for l in tqdm(fin.readlines()[1:]):\n",
    "        token = l.strip().split()[0]\n",
    "        array = ' '.join(l.strip().split()[1:])\n",
    "        word2vec[token] = array\n",
    "for token, _ in freq_dict:\n",
    "    if token in word2vec:\n",
    "        saved_token[len(saved_token)] = (token, word2vec[token])\n",
    "\n",
    "os.makedirs('../../data/qiuqiu/', exist_ok=True)\n",
    "with open('../../data/qiuqiu/vocab.txt', 'w') as fout, open('../../data/qiuqiu/embedding.txt', 'w') as f_emb:\n",
    "    fout.write('<S>\\n</S>\\n<UNK>\\n')\n",
    "    f_emb.write('{} {}\\n'.format(len(saved_token), 300))\n",
    "    for k in saved_token:\n",
    "        fout.write(k[0]+'\\n')\n",
    "        f_emd.write(k[0]+' ' + k[1] + '\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "# train_df = train_df.sample(frac=1)\n",
    "# train_df = train_df.reset_index(drop=True)\n",
    "# with open('./words.txt', 'w') as f:\n",
    "#     for c in train_df_final['content'].values:\n",
    "#         f.write(''.join(c.strip().split()) + '\\n')\n",
    "with open('./word_tests.txt', 'w') as f:\n",
    "    for c in test_df['content'].values:\n",
    "        f.write(''.join(c.strip().split()) + '\\n')\n",
    "# with open('../data', 'w') as f:\n",
    "# with open('../../data/sentiment_words_list.pkl', 'w') as f:\n",
    "#     pickle.dump(train_df['sentiment_word'].values, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sent_word_list = list(train_df_final['sentiment_word'].values)\n",
    "with open('../../data/sentiment_word.txt', 'w') as fw, open('../../data/sentiment_char.txt', 'w') as fc:\n",
    "    for idx, line in enumerate(sent_word_list):\n",
    "        if str(line) != 'nan':\n",
    "            line = line.strip()\n",
    "            fc.write(' '.join(list(line)) + '\\n')\n",
    "            fw.write(' '.join(jieba.cut(line, cut_all=False)) + '\\n')\n",
    "        else:\n",
    "            fc.write('\\n')\n",
    "            fw.write('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>content</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>sentiment_word</th>\n",
       "      <th>subject_价格</th>\n",
       "      <th>subject_内饰</th>\n",
       "      <th>subject_动力</th>\n",
       "      <th>subject_外观</th>\n",
       "      <th>subject_安全性</th>\n",
       "      <th>subject_油耗</th>\n",
       "      <th>subject_空间</th>\n",
       "      <th>subject_舒适性</th>\n",
       "      <th>subject_操控</th>\n",
       "      <th>subject_配置</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13149</td>\n",
       "      <td>因为森林人即将换代，这套系统没必...</td>\n",
       "      <td>因为 森林 人 即将 换代 ， ...</td>\n",
       "      <td>因 为 森 林 人 即 将 换 ...</td>\n",
       "      <td>影响</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2288</td>\n",
       "      <td>四驱价格貌似挺高的，高的可以看齐...</td>\n",
       "      <td>四驱 价格 貌似 挺 高 的 ，...</td>\n",
       "      <td>四 驱 价 格 貌 似 挺 高 ...</td>\n",
       "      <td>高</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1652</td>\n",
       "      <td>斯柯达要说质量，似乎比大众要好一...</td>\n",
       "      <td>斯柯达 要说 质量 ， 似乎 比...</td>\n",
       "      <td>斯 柯 达 要 说 质 量 ， ...</td>\n",
       "      <td>低</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8865</td>\n",
       "      <td>这玩意都是给有钱任性又不懂车的土...</td>\n",
       "      <td>这 玩意 都 是 给 有钱 任性...</td>\n",
       "      <td>这 玩 意 都 是 给 有 钱 ...</td>\n",
       "      <td>有钱任性</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11784</td>\n",
       "      <td>17价格忒高，估计也就是14-1...</td>\n",
       "      <td>17 价格 忒 高 ， 估计 也...</td>\n",
       "      <td>1 7 价 格 忒 高 ， 估 ...</td>\n",
       "      <td>高</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>14601</td>\n",
       "      <td>我开始就是荣放2.5  森林人2...</td>\n",
       "      <td>我 开始 就是 荣放 2.5  ...</td>\n",
       "      <td>我 开 始 就 是 荣 放 2 ...</td>\n",
       "      <td>便宜</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>15972</td>\n",
       "      <td>唉，这货的价格死硬死硬的，低配版...</td>\n",
       "      <td>唉 ， 这货 的 价格 死硬 死...</td>\n",
       "      <td>唉 ， 这 货 的 价 格 死 ...</td>\n",
       "      <td>死硬</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>17103</td>\n",
       "      <td>价格的话只能说一般般吧，太仓前段...</td>\n",
       "      <td>价格 的话 只能 说 一般般 吧...</td>\n",
       "      <td>价 格 的 话 只 能 说 一 ...</td>\n",
       "      <td>一般</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1986</td>\n",
       "      <td>听过，价格太贵，但一直念念不忘</td>\n",
       "      <td>听过 ， 价格 太贵 ， 但 一...</td>\n",
       "      <td>听 过 ， 价 格 太 贵 ， ...</td>\n",
       "      <td>贵</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>3234</td>\n",
       "      <td>恭喜恭喜，这个优惠不错哦！</td>\n",
       "      <td>恭喜 恭喜 ， 这个 优惠 不错...</td>\n",
       "      <td>恭 喜 恭 喜 ， 这 个 优 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1050</td>\n",
       "      <td>优惠幅度不小了，北京优惠八千</td>\n",
       "      <td>优惠 幅度 不小 了 ， 北京 ...</td>\n",
       "      <td>优 惠 幅 度 不 小 了 ， ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>13315</td>\n",
       "      <td>优惠可以了！购进吧！买了不会后悔...</td>\n",
       "      <td>优惠 可以 了 ！ 购进 吧 ！...</td>\n",
       "      <td>优 惠 可 以 了 ！ 购 进 ...</td>\n",
       "      <td>不会后悔</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>10206</td>\n",
       "      <td>现在 什么价 优惠多少</td>\n",
       "      <td>现在   什么 价   优惠 多少</td>\n",
       "      <td>现 在   什 么 价   优 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>10568</td>\n",
       "      <td>优惠一万一 送贴膜装甲脚垫 铁西庞大</td>\n",
       "      <td>优惠 一 万一   送 贴膜 装...</td>\n",
       "      <td>优 惠 一 万 一   送 贴 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>9259</td>\n",
       "      <td>我也大连的，最近也考虑入手森林人...</td>\n",
       "      <td>我 也 大连 的 ， 最近 也 ...</td>\n",
       "      <td>我 也 大 连 的 ， 最 近 ...</td>\n",
       "      <td>优惠太小</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>14023</td>\n",
       "      <td>山东威海全系这才优惠3000，MD</td>\n",
       "      <td>山东 威海 全系 这 才 优惠 ...</td>\n",
       "      <td>山 东 威 海 全 系 这 才 ...</td>\n",
       "      <td>才优惠</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>9975</td>\n",
       "      <td>下手了，豪导特供，优惠1.6万</td>\n",
       "      <td>下手 了 ， 豪导 特供 ， 优...</td>\n",
       "      <td>下 手 了 ， 豪 导 特 供 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>14314</td>\n",
       "      <td>优惠了8000，什么都不送。</td>\n",
       "      <td>优惠 了 8000 ， 什么 都...</td>\n",
       "      <td>优 惠 了 8 0 0 0 ， ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>450</td>\n",
       "      <td>兄弟2.5豪导特供优惠1.6万可...</td>\n",
       "      <td>兄弟 2.5 豪导 特供 优惠 ...</td>\n",
       "      <td>兄 弟 2 . 5 豪 导 特 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2344</td>\n",
       "      <td>恭喜，优惠多少，我准备明天去提，...</td>\n",
       "      <td>恭喜 ， 优惠 多少 ， 我 准...</td>\n",
       "      <td>恭 喜 ， 优 惠 多 少 ， ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2749</td>\n",
       "      <td>什么车款，多少优惠啊？康桥我只能...</td>\n",
       "      <td>什么 车款 ， 多少 优惠 啊 ...</td>\n",
       "      <td>什 么 车 款 ， 多 少 优 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>6797</td>\n",
       "      <td>优惠1 ，就送行程记录</td>\n",
       "      <td>优惠 1   ， 就 送 行程 记录</td>\n",
       "      <td>优 惠 1   ， 就 送 行 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>15414</td>\n",
       "      <td>平常心吧，我本来想买傲虎，优惠不...</td>\n",
       "      <td>平常心 吧 ， 我 本来 想买傲...</td>\n",
       "      <td>平 常 心 吧 ， 我 本 来 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3122</td>\n",
       "      <td>哈哈，25.48。优惠够给力</td>\n",
       "      <td>哈哈 ， 25.48 。 优惠 ...</td>\n",
       "      <td>哈 哈 ， 2 5 . 4 8 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>10351</td>\n",
       "      <td>兄弟，我也云南的，你的优惠也挺大...</td>\n",
       "      <td>兄弟 ， 我 也 云南 的 ， ...</td>\n",
       "      <td>兄 弟 ， 我 也 云 南 的 ...</td>\n",
       "      <td>才</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>7366</td>\n",
       "      <td>来湖北，我上月订的2.0蓝色时尚...</td>\n",
       "      <td>来 湖北 ， 我 上 月 订 的...</td>\n",
       "      <td>来 湖 北 ， 我 上 月 订 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>7785</td>\n",
       "      <td>什么都没有，就是现金优惠和给了点...</td>\n",
       "      <td>什么 都 没有 ， 就是 现金 ...</td>\n",
       "      <td>什 么 都 没 有 ， 就 是 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>13774</td>\n",
       "      <td>什么都没有，就是现金优惠和给了点...</td>\n",
       "      <td>什么 都 没有 ， 就是 现金 ...</td>\n",
       "      <td>什 么 都 没 有 ， 就 是 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>14176</td>\n",
       "      <td>原来最低也就优惠15000至16...</td>\n",
       "      <td>原来 最低 也 就 优惠 150...</td>\n",
       "      <td>原 来 最 低 也 就 优 惠 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1887</td>\n",
       "      <td>我们这里，优惠2000，我呵呵了</td>\n",
       "      <td>我们 这里 ， 优惠 2000 ...</td>\n",
       "      <td>我 们 这 里 ， 优 惠 2 ...</td>\n",
       "      <td>呵呵</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>7406</td>\n",
       "      <td>遵义优惠6000，送十万公里包养</td>\n",
       "      <td>遵义 优惠 6000 ， 送 十...</td>\n",
       "      <td>遵 义 优 惠 6 0 0 0 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>4799</td>\n",
       "      <td>深圳这破地方居然只优惠5K，看大...</td>\n",
       "      <td>深圳 这破 地方 居然 只 优惠...</td>\n",
       "      <td>深 圳 这 破 地 方 居 然 ...</td>\n",
       "      <td>只优惠</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>13784</td>\n",
       "      <td>我只看到优惠20K ，不错。呵呵</td>\n",
       "      <td>我 只 看到 优惠 20K   ...</td>\n",
       "      <td>我 只 看 到 优 惠 2 0 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>4874</td>\n",
       "      <td>本来谈到2.5豪华优惠1万，因为...</td>\n",
       "      <td>本来 谈到 2.5 豪华 优惠 ...</td>\n",
       "      <td>本 来 谈 到 2 . 5 豪 ...</td>\n",
       "      <td>值</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>1800</td>\n",
       "      <td>优惠1万，上5年保险延保两年，不...</td>\n",
       "      <td>优惠 1 万 ， 上 5 年 保...</td>\n",
       "      <td>优 惠 1 万 ， 上 5 年 ...</td>\n",
       "      <td>优惠</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>11525</td>\n",
       "      <td>哦，套路。。。。送的礼包也只有保...</td>\n",
       "      <td>哦 ， 套路 。 。 。 。 送...</td>\n",
       "      <td>哦 ， 套 路 。 。 。 。 ...</td>\n",
       "      <td>套路</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>13603</td>\n",
       "      <td>特装确实不错，配置很实用</td>\n",
       "      <td>特装 确实 不错 ， 配置 很 实用</td>\n",
       "      <td>特 装 确 实 不 错 ， 配 ...</td>\n",
       "      <td>实用</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>9319</td>\n",
       "      <td>没cd，没后视镜记忆。之家配置单...</td>\n",
       "      <td>没 cd ， 没 后视镜 记忆 ...</td>\n",
       "      <td>没 c d ， 没 后 视 镜 ...</td>\n",
       "      <td>没</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>2576</td>\n",
       "      <td>长城配置好！性价比高！</td>\n",
       "      <td>长城 配置 好 ！ 性价比 高 ！</td>\n",
       "      <td>长 城 配 置 好 ！ 性 价 ...</td>\n",
       "      <td>好</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>6168</td>\n",
       "      <td>特装我觉得就是多了几个不实用的配...</td>\n",
       "      <td>特装 我 觉得 就是 多 了 几...</td>\n",
       "      <td>特 装 我 觉 得 就 是 多 ...</td>\n",
       "      <td>不实用</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>471</td>\n",
       "      <td>摩雷的听爱卓和优特声听人声都非常...</td>\n",
       "      <td>摩雷 的 听 爱卓 和 优特 声...</td>\n",
       "      <td>摩 雷 的 听 爱 卓 和 优 ...</td>\n",
       "      <td>OK</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>5845</td>\n",
       "      <td>这个山寨导航屏幕很背光 换任何导...</td>\n",
       "      <td>这个 山寨 导航 屏幕 很 背光...</td>\n",
       "      <td>这 个 山 寨 导 航 屏 幕 ...</td>\n",
       "      <td>背光</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>12116</td>\n",
       "      <td>原车的卡打开  显示所有文件 拷...</td>\n",
       "      <td>原车 的 卡 打开     显示...</td>\n",
       "      <td>原 车 的 卡 打 开     ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>5968</td>\n",
       "      <td>个人感觉 改导航 没什么用 还不...</td>\n",
       "      <td>个人感觉   改 导航   没什...</td>\n",
       "      <td>个 人 感 觉   改 导 航 ...</td>\n",
       "      <td>不错</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>9551</td>\n",
       "      <td>还是没有手机导航更新快  而且还...</td>\n",
       "      <td>还是 没有 手机 导航 更新快 ...</td>\n",
       "      <td>还 是 没 有 手 机 导 航 ...</td>\n",
       "      <td>没有</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>12573</td>\n",
       "      <td>不仅16款，斯巴鲁的导航全是到港...</td>\n",
       "      <td>不仅 16 款 ， 斯巴鲁 的 ...</td>\n",
       "      <td>不 仅 1 6 款 ， 斯 巴 ...</td>\n",
       "      <td>山寨</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>12054</td>\n",
       "      <td>别卖了兄弟，森林人好开不保值。买...</td>\n",
       "      <td>别卖 了 兄弟 ， 森林 人好 ...</td>\n",
       "      <td>别 卖 了 兄 弟 ， 森 林 ...</td>\n",
       "      <td>不值钱</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>1168</td>\n",
       "      <td>XT性价比不高，太贵啦！还是买2...</td>\n",
       "      <td>XT 性价比 不高 ， 太贵 啦...</td>\n",
       "      <td>X T 性 价 比 不 高 ， ...</td>\n",
       "      <td>贵</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>10921</td>\n",
       "      <td>个人意见现车导航比不上手机好用，...</td>\n",
       "      <td>个人 意见 现车 导航 比不上 ...</td>\n",
       "      <td>个 人 意 见 现 车 导 航 ...</td>\n",
       "      <td>比不上</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>6874</td>\n",
       "      <td>我16款开导航一会儿屏幕就黑了</td>\n",
       "      <td>我 16 款开 导航 一会儿 屏...</td>\n",
       "      <td>我 1 6 款 开 导 航 一 ...</td>\n",
       "      <td>黑</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10604</th>\n",
       "      <td>6285</td>\n",
       "      <td>10款以前发动机加95号汽油，1...</td>\n",
       "      <td>10 款 以前 发动机 加 95...</td>\n",
       "      <td>1 0 款 以 前 发 动 机 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10605</th>\n",
       "      <td>16452</td>\n",
       "      <td>我的2.04AT！近期油耗13....</td>\n",
       "      <td>我 的 2.04 AT ！ 近期...</td>\n",
       "      <td>我 的 2 . 0 4 A T ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10606</th>\n",
       "      <td>14205</td>\n",
       "      <td>这台发动机没听说过烧机油啊，我刚...</td>\n",
       "      <td>这台 发动机 没听说过 烧 机油...</td>\n",
       "      <td>这 台 发 动 机 没 听 说 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10607</th>\n",
       "      <td>16176</td>\n",
       "      <td>如果市内短途为主，这个油耗正常。</td>\n",
       "      <td>如果 市内 短途 为主 ， 这个...</td>\n",
       "      <td>如 果 市 内 短 途 为 主 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10608</th>\n",
       "      <td>12129</td>\n",
       "      <td>除啦不能秒H5。秒杀一切城市SU...</td>\n",
       "      <td>除 啦 不能 秒 H5 。 秒杀...</td>\n",
       "      <td>除 啦 不 能 秒 H 5 。 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10609</th>\n",
       "      <td>4152</td>\n",
       "      <td>听说米其林的胎噪音低，我也正在考虑</td>\n",
       "      <td>听说 米其林 的 胎 噪音 低 ...</td>\n",
       "      <td>听 说 米 其 林 的 胎 噪 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10610</th>\n",
       "      <td>4942</td>\n",
       "      <td>我昨天也是试驾了2.5顶配，刹车...</td>\n",
       "      <td>我 昨天 也 是 试驾 了 2....</td>\n",
       "      <td>我 昨 天 也 是 试 驾 了 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10611</th>\n",
       "      <td>674</td>\n",
       "      <td>我就是直接旋转中间的钮 开空调 ...</td>\n",
       "      <td>我 就是 直接 旋转 中间 的 ...</td>\n",
       "      <td>我 就 是 直 接 旋 转 中 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10612</th>\n",
       "      <td>3973</td>\n",
       "      <td>是的，看动力选择和设计取向了，希...</td>\n",
       "      <td>是 的 ， 看 动力 选择 和 ...</td>\n",
       "      <td>是 的 ， 看 动 力 选 择 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10613</th>\n",
       "      <td>13733</td>\n",
       "      <td>嗯嗯，深有同感。我就是看好纯进口...</td>\n",
       "      <td>嗯 嗯 ， 深有同感 。 我 就...</td>\n",
       "      <td>嗯 嗯 ， 深 有 同 感 。 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10614</th>\n",
       "      <td>3109</td>\n",
       "      <td>以前都是拼发动机，现在都是拼变速...</td>\n",
       "      <td>以前 都 是 拼 发动机 ， 现...</td>\n",
       "      <td>以 前 都 是 拼 发 动 机 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10615</th>\n",
       "      <td>9167</td>\n",
       "      <td>哈哈！朋友跟我的想法一样 之前我...</td>\n",
       "      <td>哈哈 ！ 朋友 跟 我 的 想法...</td>\n",
       "      <td>哈 哈 ！ 朋 友 跟 我 的 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10616</th>\n",
       "      <td>5346</td>\n",
       "      <td>我觉得，变速箱机械手刹不用改（我...</td>\n",
       "      <td>我 觉得 ， 变速箱 机械 手刹...</td>\n",
       "      <td>我 觉 得 ， 变 速 箱 机 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10617</th>\n",
       "      <td>207</td>\n",
       "      <td>座椅上方有个上提的拉手，提着拉手...</td>\n",
       "      <td>座椅 上方 有个 上 提 的 拉...</td>\n",
       "      <td>座 椅 上 方 有 个 上 提 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10618</th>\n",
       "      <td>14851</td>\n",
       "      <td>斯巴鲁的中控屏都不是原车的，原车...</td>\n",
       "      <td>斯巴鲁 的 中 控屏 都 不是 ...</td>\n",
       "      <td>斯 巴 鲁 的 中 控 屏 都 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10619</th>\n",
       "      <td>9178</td>\n",
       "      <td>看来这是老款森林人又一个做工上的...</td>\n",
       "      <td>看来 这是 老款 森林 人 又 ...</td>\n",
       "      <td>看 来 这 是 老 款 森 林 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10620</th>\n",
       "      <td>17123</td>\n",
       "      <td>外观不要改太多，硬派些</td>\n",
       "      <td>外观 不要 改太多 ， 硬派 些</td>\n",
       "      <td>外 观 不 要 改 太 多 ， ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10621</th>\n",
       "      <td>14285</td>\n",
       "      <td>转速还行吧 WRX一样的发动机开...</td>\n",
       "      <td>转速 还行 吧   WRX 一样...</td>\n",
       "      <td>转 速 还 行 吧   W R ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10622</th>\n",
       "      <td>3223</td>\n",
       "      <td>路况很差，下很长陡坡的时候，不用...</td>\n",
       "      <td>路况 很差 ， 下 很长 陡坡 ...</td>\n",
       "      <td>路 况 很 差 ， 下 很 长 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10623</th>\n",
       "      <td>2161</td>\n",
       "      <td>不管哪年的产物人家也是Q5底盘规...</td>\n",
       "      <td>不管 哪年 的 产物 人家 也 ...</td>\n",
       "      <td>不 管 哪 年 的 产 物 人 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10624</th>\n",
       "      <td>6542</td>\n",
       "      <td>正解。目前大家的油耗大部分都是0...</td>\n",
       "      <td>正 解 。 目前 大家 的 油耗...</td>\n",
       "      <td>正 解 。 目 前 大 家 的 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10625</th>\n",
       "      <td>10786</td>\n",
       "      <td>2.5的，双排会丢失动力吗？</td>\n",
       "      <td>2.5 的 ， 双排 会 丢失 ...</td>\n",
       "      <td>2 . 5 的 ， 双 排 会 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10626</th>\n",
       "      <td>1556</td>\n",
       "      <td>680你还觉得便宜？只能说这些年...</td>\n",
       "      <td>680 你 还 觉得 便宜 ？ ...</td>\n",
       "      <td>6 8 0 你 还 觉 得 便 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10627</th>\n",
       "      <td>4073</td>\n",
       "      <td>转向助力 空调 发电机 真空泵需...</td>\n",
       "      <td>转向 助力   空调   发电机...</td>\n",
       "      <td>转 向 助 力   空 调   ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10628</th>\n",
       "      <td>12602</td>\n",
       "      <td>美国买的原装导航也是一坨屎</td>\n",
       "      <td>美国 买 的 原装 导航 也 是...</td>\n",
       "      <td>美 国 买 的 原 装 导 航 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10629</th>\n",
       "      <td>3808</td>\n",
       "      <td>我的是16款2.0，也快2万公里...</td>\n",
       "      <td>我 的 是 16 款 2.0 ，...</td>\n",
       "      <td>我 的 是 1 6 款 2 . ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10630</th>\n",
       "      <td>13751</td>\n",
       "      <td>有呀，哈哈。雪地里，方向盘打死，...</td>\n",
       "      <td>有 呀 ， 哈哈 。 雪地 里 ...</td>\n",
       "      <td>有 呀 ， 哈 哈 。 雪 地 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10631</th>\n",
       "      <td>390</td>\n",
       "      <td>可以设置的，是不是碰到中控屏膜了</td>\n",
       "      <td>可以 设置 的 ， 是不是 碰到...</td>\n",
       "      <td>可 以 设 置 的 ， 是 不 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10632</th>\n",
       "      <td>97</td>\n",
       "      <td>cvt这油耗不正常。at也就12撑死</td>\n",
       "      <td>cvt 这 油耗 不 正常 。 ...</td>\n",
       "      <td>c v t 这 油 耗 不 正 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10633</th>\n",
       "      <td>9421</td>\n",
       "      <td>方向盘怎么跟着发动机走？</td>\n",
       "      <td>方向盘 怎么 跟着 发动机 走 ？</td>\n",
       "      <td>方 向 盘 怎 么 跟 着 发 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10634</th>\n",
       "      <td>14368</td>\n",
       "      <td>车子路感，动力如何。我也准备入手</td>\n",
       "      <td>车子 路感 ， 动力 如何 。 ...</td>\n",
       "      <td>车 子 路 感 ， 动 力 如 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10635</th>\n",
       "      <td>12549</td>\n",
       "      <td>只有图四不是……图四是用来体现棕...</td>\n",
       "      <td>只有 图四 不是 … … 图四是...</td>\n",
       "      <td>只 有 图 四 不 是 … … ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10636</th>\n",
       "      <td>3542</td>\n",
       "      <td>碰到屏幕上的摄像头开关，会启动后...</td>\n",
       "      <td>碰到 屏幕 上 的 摄像头 开关...</td>\n",
       "      <td>碰 到 屏 幕 上 的 摄 像 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10637</th>\n",
       "      <td>17002</td>\n",
       "      <td>我问你变速箱链条，你说皮带，反正...</td>\n",
       "      <td>我 问 你 变速箱 链条 ， 你...</td>\n",
       "      <td>我 问 你 变 速 箱 链 条 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10638</th>\n",
       "      <td>15828</td>\n",
       "      <td>动力在100公里内都很好，加速线...</td>\n",
       "      <td>动力 在 100 公里 内 都 ...</td>\n",
       "      <td>动 力 在 1 0 0 公 里 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10639</th>\n",
       "      <td>7105</td>\n",
       "      <td>对一分钱一分货。换位思考人车好好...</td>\n",
       "      <td>对 一分钱 一分货 。 换位 思...</td>\n",
       "      <td>对 一 分 钱 一 分 货 。 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10640</th>\n",
       "      <td>2415</td>\n",
       "      <td>在高架上正常开着车，听着收音机，...</td>\n",
       "      <td>在 高 架上 正常 开着车 ， ...</td>\n",
       "      <td>在 高 架 上 正 常 开 着 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10641</th>\n",
       "      <td>6886</td>\n",
       "      <td>我用的是佳明导航，跑长途的时候挂...</td>\n",
       "      <td>我用 的 是 佳明 导航 ， 跑...</td>\n",
       "      <td>我 用 的 是 佳 明 导 航 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10642</th>\n",
       "      <td>8568</td>\n",
       "      <td>还可以吧，直接链接原车导航的</td>\n",
       "      <td>还 可以 吧 ， 直接 链接 原...</td>\n",
       "      <td>还 可 以 吧 ， 直 接 链 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10643</th>\n",
       "      <td>14103</td>\n",
       "      <td>那是刹车那边的一个塑料套管发出来...</td>\n",
       "      <td>那 是 刹车 那边 的 一个 塑...</td>\n",
       "      <td>那 是 刹 车 那 边 的 一 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10644</th>\n",
       "      <td>7189</td>\n",
       "      <td>水温灯灭了以后。感觉变速箱有保护...</td>\n",
       "      <td>水温 灯灭了 以后 。 感觉 变...</td>\n",
       "      <td>水 温 灯 灭 了 以 后 。 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10645</th>\n",
       "      <td>10992</td>\n",
       "      <td>12款2.5xs  4at， 目...</td>\n",
       "      <td>12 款 2.5 xs     ...</td>\n",
       "      <td>1 2 款 2 . 5 x s ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10646</th>\n",
       "      <td>5657</td>\n",
       "      <td>我的也是斯巴鲁，你别踩刹车，按开...</td>\n",
       "      <td>我 的 也 是 斯巴鲁 ， 你别...</td>\n",
       "      <td>我 的 也 是 斯 巴 鲁 ， ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10647</th>\n",
       "      <td>5756</td>\n",
       "      <td>不打死 方向在正位的时候 快速晃...</td>\n",
       "      <td>不 打死   方向 在 正位 的...</td>\n",
       "      <td>不 打 死   方 向 在 正 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10648</th>\n",
       "      <td>1216</td>\n",
       "      <td>我觉得斯巴鲁的CVT比奥迪的CV...</td>\n",
       "      <td>我 觉得 斯巴鲁 的 CVT 比...</td>\n",
       "      <td>我 觉 得 斯 巴 鲁 的 C ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10649</th>\n",
       "      <td>17392</td>\n",
       "      <td>全时四驱仅比一般SUV车强一点，...</td>\n",
       "      <td>全时 四 驱仅 比 一般 SUV...</td>\n",
       "      <td>全 时 四 驱 仅 比 一 般 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10650</th>\n",
       "      <td>9780</td>\n",
       "      <td>哈哈，终于看到有人开始厌烦前置雷...</td>\n",
       "      <td>哈哈 ， 终于 看到 有人 开始...</td>\n",
       "      <td>哈 哈 ， 终 于 看 到 有 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10651</th>\n",
       "      <td>1079</td>\n",
       "      <td>请教一下，变速箱油，差速器油，火...</td>\n",
       "      <td>请教 一下 ， 变速箱 油 ， ...</td>\n",
       "      <td>请 教 一 下 ， 变 速 箱 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10652</th>\n",
       "      <td>16766</td>\n",
       "      <td>求购二手１４款ＸＴ的后刹车总成。...</td>\n",
       "      <td>求购 二手 １ ４ 款 Ｘ Ｔ ...</td>\n",
       "      <td>求 购 二 手 １ ４ 款 Ｘ ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10653</th>\n",
       "      <td>9651</td>\n",
       "      <td>是的，就是奔全时四驱买的车。</td>\n",
       "      <td>是 的 ， 就是 奔 全时 四驱...</td>\n",
       "      <td>是 的 ， 就 是 奔 全 时 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10654 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      content_id              content                 word  \\\n",
       "0          13149  因为森林人即将换代，这套系统没必...  因为 森林 人 即将 换代 ， ...   \n",
       "1           2288  四驱价格貌似挺高的，高的可以看齐...  四驱 价格 貌似 挺 高 的 ，...   \n",
       "2           1652  斯柯达要说质量，似乎比大众要好一...  斯柯达 要说 质量 ， 似乎 比...   \n",
       "3           8865  这玩意都是给有钱任性又不懂车的土...  这 玩意 都 是 给 有钱 任性...   \n",
       "4          11784  17价格忒高，估计也就是14-1...  17 价格 忒 高 ， 估计 也...   \n",
       "5          14601  我开始就是荣放2.5  森林人2...  我 开始 就是 荣放 2.5  ...   \n",
       "6          15972  唉，这货的价格死硬死硬的，低配版...  唉 ， 这货 的 价格 死硬 死...   \n",
       "7          17103  价格的话只能说一般般吧，太仓前段...  价格 的话 只能 说 一般般 吧...   \n",
       "8           1986  听过，价格太贵，但一直念念不忘      听过 ， 价格 太贵 ， 但 一...   \n",
       "9           3234        恭喜恭喜，这个优惠不错哦！  恭喜 恭喜 ， 这个 优惠 不错...   \n",
       "10          1050       优惠幅度不小了，北京优惠八千  优惠 幅度 不小 了 ， 北京 ...   \n",
       "11         13315  优惠可以了！购进吧！买了不会后悔...  优惠 可以 了 ！ 购进 吧 ！...   \n",
       "12         10206          现在 什么价 优惠多少    现在   什么 价   优惠 多少   \n",
       "13         10568   优惠一万一 送贴膜装甲脚垫 铁西庞大  优惠 一 万一   送 贴膜 装...   \n",
       "14          9259  我也大连的，最近也考虑入手森林人...  我 也 大连 的 ， 最近 也 ...   \n",
       "15         14023    山东威海全系这才优惠3000，MD  山东 威海 全系 这 才 优惠 ...   \n",
       "16          9975      下手了，豪导特供，优惠1.6万  下手 了 ， 豪导 特供 ， 优...   \n",
       "17         14314   优惠了8000，什么都不送。      优惠 了 8000 ， 什么 都...   \n",
       "18           450  兄弟2.5豪导特供优惠1.6万可...  兄弟 2.5 豪导 特供 优惠 ...   \n",
       "19          2344  恭喜，优惠多少，我准备明天去提，...  恭喜 ， 优惠 多少 ， 我 准...   \n",
       "20          2749  什么车款，多少优惠啊？康桥我只能...  什么 车款 ， 多少 优惠 啊 ...   \n",
       "21          6797     优惠1 ，就送行程记录        优惠 1   ， 就 送 行程 记录   \n",
       "22         15414  平常心吧，我本来想买傲虎，优惠不...  平常心 吧 ， 我 本来 想买傲...   \n",
       "23          3122   哈哈，25.48。优惠够给力      哈哈 ， 25.48 。 优惠 ...   \n",
       "24         10351  兄弟，我也云南的，你的优惠也挺大...  兄弟 ， 我 也 云南 的 ， ...   \n",
       "25          7366  来湖北，我上月订的2.0蓝色时尚...  来 湖北 ， 我 上 月 订 的...   \n",
       "26          7785  什么都没有，就是现金优惠和给了点...  什么 都 没有 ， 就是 现金 ...   \n",
       "27         13774  什么都没有，就是现金优惠和给了点...  什么 都 没有 ， 就是 现金 ...   \n",
       "28         14176  原来最低也就优惠15000至16...  原来 最低 也 就 优惠 150...   \n",
       "29          1887     我们这里，优惠2000，我呵呵了  我们 这里 ， 优惠 2000 ...   \n",
       "30          7406     遵义优惠6000，送十万公里包养  遵义 优惠 6000 ， 送 十...   \n",
       "31          4799  深圳这破地方居然只优惠5K，看大...  深圳 这破 地方 居然 只 优惠...   \n",
       "32         13784     我只看到优惠20K ，不错。呵呵  我 只 看到 优惠 20K   ...   \n",
       "33          4874  本来谈到2.5豪华优惠1万，因为...  本来 谈到 2.5 豪华 优惠 ...   \n",
       "34          1800  优惠1万，上5年保险延保两年，不...  优惠 1 万 ， 上 5 年 保...   \n",
       "35         11525  哦，套路。。。。送的礼包也只有保...  哦 ， 套路 。 。 。 。 送...   \n",
       "36         13603         特装确实不错，配置很实用   特装 确实 不错 ， 配置 很 实用   \n",
       "37          9319  没cd，没后视镜记忆。之家配置单...  没 cd ， 没 后视镜 记忆 ...   \n",
       "38          2576          长城配置好！性价比高！    长城 配置 好 ！ 性价比 高 ！   \n",
       "39          6168  特装我觉得就是多了几个不实用的配...  特装 我 觉得 就是 多 了 几...   \n",
       "40           471  摩雷的听爱卓和优特声听人声都非常...  摩雷 的 听 爱卓 和 优特 声...   \n",
       "41          5845  这个山寨导航屏幕很背光 换任何导...  这个 山寨 导航 屏幕 很 背光...   \n",
       "42         12116  原车的卡打开  显示所有文件 拷...  原车 的 卡 打开     显示...   \n",
       "43          5968  个人感觉 改导航 没什么用 还不...  个人感觉   改 导航   没什...   \n",
       "44          9551  还是没有手机导航更新快  而且还...  还是 没有 手机 导航 更新快 ...   \n",
       "45         12573  不仅16款，斯巴鲁的导航全是到港...  不仅 16 款 ， 斯巴鲁 的 ...   \n",
       "46         12054  别卖了兄弟，森林人好开不保值。买...  别卖 了 兄弟 ， 森林 人好 ...   \n",
       "47          1168  XT性价比不高，太贵啦！还是买2...  XT 性价比 不高 ， 太贵 啦...   \n",
       "48         10921  个人意见现车导航比不上手机好用，...  个人 意见 现车 导航 比不上 ...   \n",
       "49          6874      我16款开导航一会儿屏幕就黑了  我 16 款开 导航 一会儿 屏...   \n",
       "...          ...                  ...                  ...   \n",
       "10604       6285  10款以前发动机加95号汽油，1...  10 款 以前 发动机 加 95...   \n",
       "10605      16452  我的2.04AT！近期油耗13....  我 的 2.04 AT ！ 近期...   \n",
       "10606      14205  这台发动机没听说过烧机油啊，我刚...  这台 发动机 没听说过 烧 机油...   \n",
       "10607      16176     如果市内短途为主，这个油耗正常。  如果 市内 短途 为主 ， 这个...   \n",
       "10608      12129  除啦不能秒H5。秒杀一切城市SU...  除 啦 不能 秒 H5 。 秒杀...   \n",
       "10609       4152    听说米其林的胎噪音低，我也正在考虑  听说 米其林 的 胎 噪音 低 ...   \n",
       "10610       4942  我昨天也是试驾了2.5顶配，刹车...  我 昨天 也 是 试驾 了 2....   \n",
       "10611        674  我就是直接旋转中间的钮 开空调 ...  我 就是 直接 旋转 中间 的 ...   \n",
       "10612       3973  是的，看动力选择和设计取向了，希...  是 的 ， 看 动力 选择 和 ...   \n",
       "10613      13733  嗯嗯，深有同感。我就是看好纯进口...  嗯 嗯 ， 深有同感 。 我 就...   \n",
       "10614       3109  以前都是拼发动机，现在都是拼变速...  以前 都 是 拼 发动机 ， 现...   \n",
       "10615       9167  哈哈！朋友跟我的想法一样 之前我...  哈哈 ！ 朋友 跟 我 的 想法...   \n",
       "10616       5346  我觉得，变速箱机械手刹不用改（我...  我 觉得 ， 变速箱 机械 手刹...   \n",
       "10617        207  座椅上方有个上提的拉手，提着拉手...  座椅 上方 有个 上 提 的 拉...   \n",
       "10618      14851  斯巴鲁的中控屏都不是原车的，原车...  斯巴鲁 的 中 控屏 都 不是 ...   \n",
       "10619       9178  看来这是老款森林人又一个做工上的...  看来 这是 老款 森林 人 又 ...   \n",
       "10620      17123          外观不要改太多，硬派些     外观 不要 改太多 ， 硬派 些   \n",
       "10621      14285  转速还行吧 WRX一样的发动机开...  转速 还行 吧   WRX 一样...   \n",
       "10622       3223  路况很差，下很长陡坡的时候，不用...  路况 很差 ， 下 很长 陡坡 ...   \n",
       "10623       2161  不管哪年的产物人家也是Q5底盘规...  不管 哪年 的 产物 人家 也 ...   \n",
       "10624       6542  正解。目前大家的油耗大部分都是0...  正 解 。 目前 大家 的 油耗...   \n",
       "10625      10786       2.5的，双排会丢失动力吗？  2.5 的 ， 双排 会 丢失 ...   \n",
       "10626       1556  680你还觉得便宜？只能说这些年...  680 你 还 觉得 便宜 ？ ...   \n",
       "10627       4073  转向助力 空调 发电机 真空泵需...  转向 助力   空调   发电机...   \n",
       "10628      12602        美国买的原装导航也是一坨屎  美国 买 的 原装 导航 也 是...   \n",
       "10629       3808  我的是16款2.0，也快2万公里...  我 的 是 16 款 2.0 ，...   \n",
       "10630      13751  有呀，哈哈。雪地里，方向盘打死，...  有 呀 ， 哈哈 。 雪地 里 ...   \n",
       "10631        390     可以设置的，是不是碰到中控屏膜了  可以 设置 的 ， 是不是 碰到...   \n",
       "10632         97   cvt这油耗不正常。at也就12撑死  cvt 这 油耗 不 正常 。 ...   \n",
       "10633       9421         方向盘怎么跟着发动机走？    方向盘 怎么 跟着 发动机 走 ？   \n",
       "10634      14368     车子路感，动力如何。我也准备入手  车子 路感 ， 动力 如何 。 ...   \n",
       "10635      12549  只有图四不是……图四是用来体现棕...  只有 图四 不是 … … 图四是...   \n",
       "10636       3542  碰到屏幕上的摄像头开关，会启动后...  碰到 屏幕 上 的 摄像头 开关...   \n",
       "10637      17002  我问你变速箱链条，你说皮带，反正...  我 问 你 变速箱 链条 ， 你...   \n",
       "10638      15828  动力在100公里内都很好，加速线...  动力 在 100 公里 内 都 ...   \n",
       "10639       7105  对一分钱一分货。换位思考人车好好...  对 一分钱 一分货 。 换位 思...   \n",
       "10640       2415  在高架上正常开着车，听着收音机，...  在 高 架上 正常 开着车 ， ...   \n",
       "10641       6886  我用的是佳明导航，跑长途的时候挂...  我用 的 是 佳明 导航 ， 跑...   \n",
       "10642       8568       还可以吧，直接链接原车导航的  还 可以 吧 ， 直接 链接 原...   \n",
       "10643      14103  那是刹车那边的一个塑料套管发出来...  那 是 刹车 那边 的 一个 塑...   \n",
       "10644       7189  水温灯灭了以后。感觉变速箱有保护...  水温 灯灭了 以后 。 感觉 变...   \n",
       "10645      10992  12款2.5xs  4at， 目...  12 款 2.5 xs     ...   \n",
       "10646       5657  我的也是斯巴鲁，你别踩刹车，按开...  我 的 也 是 斯巴鲁 ， 你别...   \n",
       "10647       5756  不打死 方向在正位的时候 快速晃...  不 打死   方向 在 正位 的...   \n",
       "10648       1216  我觉得斯巴鲁的CVT比奥迪的CV...  我 觉得 斯巴鲁 的 CVT 比...   \n",
       "10649      17392  全时四驱仅比一般SUV车强一点，...  全时 四 驱仅 比 一般 SUV...   \n",
       "10650       9780  哈哈，终于看到有人开始厌烦前置雷...  哈哈 ， 终于 看到 有人 开始...   \n",
       "10651       1079  请教一下，变速箱油，差速器油，火...  请教 一下 ， 变速箱 油 ， ...   \n",
       "10652      16766  求购二手１４款ＸＴ的后刹车总成。...  求购 二手 １ ４ 款 Ｘ Ｔ ...   \n",
       "10653       9651       是的，就是奔全时四驱买的车。  是 的 ， 就是 奔 全时 四驱...   \n",
       "\n",
       "                      char sentiment_word subject_价格 subject_内饰 subject_动力  \\\n",
       "0      因 为 森 林 人 即 将 换 ...             影响          2          0          0   \n",
       "1      四 驱 价 格 貌 似 挺 高 ...              高          1          0          0   \n",
       "2      斯 柯 达 要 说 质 量 ， ...              低          3          0          0   \n",
       "3      这 玩 意 都 是 给 有 钱 ...           有钱任性          1          0          0   \n",
       "4      1 7 价 格 忒 高 ， 估 ...              高          1          0          0   \n",
       "5      我 开 始 就 是 荣 放 2 ...             便宜          3          0          0   \n",
       "6      唉 ， 这 货 的 价 格 死 ...             死硬          1          0          0   \n",
       "7      价 格 的 话 只 能 说 一 ...             一般          2          0          0   \n",
       "8      听 过 ， 价 格 太 贵 ， ...              贵          1          0          0   \n",
       "9      恭 喜 恭 喜 ， 这 个 优 ...             优惠          2          0          0   \n",
       "10     优 惠 幅 度 不 小 了 ， ...             优惠          2          0          0   \n",
       "11     优 惠 可 以 了 ！ 购 进 ...           不会后悔          3          0          0   \n",
       "12     现 在   什 么 价   优 ...             优惠          2          0          0   \n",
       "13     优 惠 一 万 一   送 贴 ...             优惠          2          0          0   \n",
       "14     我 也 大 连 的 ， 最 近 ...           优惠太小          1          0          0   \n",
       "15     山 东 威 海 全 系 这 才 ...            才优惠          1          0          0   \n",
       "16     下 手 了 ， 豪 导 特 供 ...             优惠          2          0          0   \n",
       "17     优 惠 了 8 0 0 0 ， ...             优惠          2          0          0   \n",
       "18     兄 弟 2 . 5 豪 导 特 ...             优惠          2          0          0   \n",
       "19     恭 喜 ， 优 惠 多 少 ， ...             优惠          2          0          0   \n",
       "20     什 么 车 款 ， 多 少 优 ...             优惠          2          0          0   \n",
       "21     优 惠 1   ， 就 送 行 ...             优惠          2          0          0   \n",
       "22     平 常 心 吧 ， 我 本 来 ...             优惠          2          0          0   \n",
       "23     哈 哈 ， 2 5 . 4 8 ...             优惠          2          0          0   \n",
       "24     兄 弟 ， 我 也 云 南 的 ...              才          1          0          0   \n",
       "25     来 湖 北 ， 我 上 月 订 ...             优惠          2          0          0   \n",
       "26     什 么 都 没 有 ， 就 是 ...             优惠          2          0          0   \n",
       "27     什 么 都 没 有 ， 就 是 ...             优惠          2          0          0   \n",
       "28     原 来 最 低 也 就 优 惠 ...             优惠          2          0          0   \n",
       "29     我 们 这 里 ， 优 惠 2 ...             呵呵          1          0          0   \n",
       "30     遵 义 优 惠 6 0 0 0 ...             优惠          2          0          0   \n",
       "31     深 圳 这 破 地 方 居 然 ...            只优惠          1          0          0   \n",
       "32     我 只 看 到 优 惠 2 0 ...             优惠          2          0          0   \n",
       "33     本 来 谈 到 2 . 5 豪 ...              值          3          0          0   \n",
       "34     优 惠 1 万 ， 上 5 年 ...             优惠          2          0          0   \n",
       "35     哦 ， 套 路 。 。 。 。 ...             套路          1          0          0   \n",
       "36     特 装 确 实 不 错 ， 配 ...             实用          0          0          0   \n",
       "37     没 c d ， 没 后 视 镜 ...              没          0          0          0   \n",
       "38     长 城 配 置 好 ！ 性 价 ...              好          3          0          0   \n",
       "39     特 装 我 觉 得 就 是 多 ...            不实用          0          0          0   \n",
       "40     摩 雷 的 听 爱 卓 和 优 ...             OK          0          0          0   \n",
       "41     这 个 山 寨 导 航 屏 幕 ...             背光          0          0          0   \n",
       "42     原 车 的 卡 打 开     ...            NaN          0          0          0   \n",
       "43     个 人 感 觉   改 导 航 ...             不错          0          0          0   \n",
       "44     还 是 没 有 手 机 导 航 ...             没有          0          0          0   \n",
       "45     不 仅 1 6 款 ， 斯 巴 ...             山寨          0          0          0   \n",
       "46     别 卖 了 兄 弟 ， 森 林 ...            不值钱          0          0          0   \n",
       "47     X T 性 价 比 不 高 ， ...              贵          0          0          0   \n",
       "48     个 人 意 见 现 车 导 航 ...            比不上          0          0          0   \n",
       "49     我 1 6 款 开 导 航 一 ...              黑          0          0          0   \n",
       "...                    ...            ...        ...        ...        ...   \n",
       "10604  1 0 款 以 前 发 动 机 ...            NaN          0          0          2   \n",
       "10605  我 的 2 . 0 4 A T ...            NaN          0          0          0   \n",
       "10606  这 台 发 动 机 没 听 说 ...            NaN          0          0          2   \n",
       "10607  如 果 市 内 短 途 为 主 ...            NaN          0          0          0   \n",
       "10608  除 啦 不 能 秒 H 5 。 ...            NaN          0          0          2   \n",
       "10609  听 说 米 其 林 的 胎 噪 ...            NaN          0          0          0   \n",
       "10610  我 昨 天 也 是 试 驾 了 ...            NaN          0          0          0   \n",
       "10611  我 就 是 直 接 旋 转 中 ...            NaN          0          0          0   \n",
       "10612  是 的 ， 看 动 力 选 择 ...            NaN          0          0          2   \n",
       "10613  嗯 嗯 ， 深 有 同 感 。 ...            NaN          0          0          0   \n",
       "10614  以 前 都 是 拼 发 动 机 ...            NaN          0          0          2   \n",
       "10615  哈 哈 ！ 朋 友 跟 我 的 ...            NaN          0          0          0   \n",
       "10616  我 觉 得 ， 变 速 箱 机 ...            NaN          0          3          2   \n",
       "10617  座 椅 上 方 有 个 上 提 ...            NaN          0          2          0   \n",
       "10618  斯 巴 鲁 的 中 控 屏 都 ...            NaN          0          0          0   \n",
       "10619  看 来 这 是 老 款 森 林 ...            NaN          0          1          0   \n",
       "10620  外 观 不 要 改 太 多 ， ...            NaN          0          0          0   \n",
       "10621  转 速 还 行 吧   W R ...            NaN          0          0          3   \n",
       "10622  路 况 很 差 ， 下 很 长 ...            NaN          0          0          0   \n",
       "10623  不 管 哪 年 的 产 物 人 ...            NaN          0          0          0   \n",
       "10624  正 解 。 目 前 大 家 的 ...            NaN          0          0          0   \n",
       "10625  2 . 5 的 ， 双 排 会 ...            NaN          0          0          2   \n",
       "10626  6 8 0 你 还 觉 得 便 ...            NaN          1          0          0   \n",
       "10627  转 向 助 力   空 调   ...            NaN          0          0          0   \n",
       "10628  美 国 买 的 原 装 导 航 ...            NaN          0          0          0   \n",
       "10629  我 的 是 1 6 款 2 . ...            NaN          0          0          0   \n",
       "10630  有 呀 ， 哈 哈 。 雪 地 ...            NaN          0          0          0   \n",
       "10631  可 以 设 置 的 ， 是 不 ...            NaN          0          0          0   \n",
       "10632  c v t 这 油 耗 不 正 ...            NaN          0          0          0   \n",
       "10633  方 向 盘 怎 么 跟 着 发 ...            NaN          0          0          2   \n",
       "10634  车 子 路 感 ， 动 力 如 ...            NaN          0          0          2   \n",
       "10635  只 有 图 四 不 是 … … ...            NaN          0          2          0   \n",
       "10636  碰 到 屏 幕 上 的 摄 像 ...            NaN          0          0          0   \n",
       "10637  我 问 你 变 速 箱 链 条 ...            NaN          0          0          2   \n",
       "10638  动 力 在 1 0 0 公 里 ...            NaN          0          0          3   \n",
       "10639  对 一 分 钱 一 分 货 。 ...            NaN          1          0          0   \n",
       "10640  在 高 架 上 正 常 开 着 ...            NaN          0          0          0   \n",
       "10641  我 用 的 是 佳 明 导 航 ...            NaN          0          0          0   \n",
       "10642  还 可 以 吧 ， 直 接 链 ...            NaN          0          0          0   \n",
       "10643  那 是 刹 车 那 边 的 一 ...            NaN          0          0          0   \n",
       "10644  水 温 灯 灭 了 以 后 。 ...            NaN          0          0          2   \n",
       "10645  1 2 款 2 . 5 x s ...            NaN          0          0          0   \n",
       "10646  我 的 也 是 斯 巴 鲁 ， ...            NaN          0          0          0   \n",
       "10647  不 打 死   方 向 在 正 ...            NaN          0          0          0   \n",
       "10648  我 觉 得 斯 巴 鲁 的 C ...            NaN          0          0          3   \n",
       "10649  全 时 四 驱 仅 比 一 般 ...            NaN          0          0          2   \n",
       "10650  哈 哈 ， 终 于 看 到 有 ...            NaN          0          0          0   \n",
       "10651  请 教 一 下 ， 变 速 箱 ...            NaN          0          0          2   \n",
       "10652  求 购 二 手 １ ４ 款 Ｘ ...            NaN          0          0          0   \n",
       "10653  是 的 ， 就 是 奔 全 时 ...            NaN          0          0          0   \n",
       "\n",
       "      subject_外观 subject_安全性 subject_油耗 subject_空间 subject_舒适性 subject_操控  \\\n",
       "0              0           0          0          0           0          0   \n",
       "1              0           0          0          0           0          0   \n",
       "2              0           0          0          0           0          0   \n",
       "3              0           0          0          0           0          0   \n",
       "4              0           0          0          0           0          0   \n",
       "5              0           0          0          0           0          0   \n",
       "6              0           0          0          0           0          0   \n",
       "7              0           0          0          0           0          0   \n",
       "8              0           0          0          0           0          0   \n",
       "9              0           0          0          0           0          0   \n",
       "10             0           0          0          0           0          0   \n",
       "11             0           0          0          0           0          0   \n",
       "12             0           0          0          0           0          0   \n",
       "13             0           0          0          0           0          0   \n",
       "14             0           0          0          0           0          0   \n",
       "15             0           0          0          0           0          0   \n",
       "16             0           0          0          0           0          0   \n",
       "17             0           0          0          0           0          0   \n",
       "18             0           0          0          0           0          0   \n",
       "19             0           0          0          0           0          0   \n",
       "20             0           0          0          0           0          0   \n",
       "21             0           0          0          0           0          0   \n",
       "22             0           0          0          0           0          0   \n",
       "23             0           0          0          0           0          0   \n",
       "24             0           0          0          0           0          0   \n",
       "25             0           0          0          0           0          0   \n",
       "26             0           0          0          0           0          0   \n",
       "27             0           0          0          0           0          0   \n",
       "28             0           0          0          0           0          0   \n",
       "29             0           0          0          0           0          0   \n",
       "30             0           0          0          0           0          0   \n",
       "31             0           0          0          0           0          0   \n",
       "32             0           0          0          0           0          0   \n",
       "33             0           0          0          0           0          0   \n",
       "34             0           0          0          0           0          0   \n",
       "35             0           0          0          0           0          0   \n",
       "36             0           0          0          0           0          0   \n",
       "37             0           0          0          0           0          0   \n",
       "38             0           0          0          0           0          0   \n",
       "39             0           0          0          0           0          0   \n",
       "40             0           0          0          0           0          0   \n",
       "41             0           0          0          0           0          0   \n",
       "42             0           0          0          0           0          0   \n",
       "43             0           0          0          0           0          0   \n",
       "44             0           0          0          0           0          0   \n",
       "45             0           0          0          0           0          0   \n",
       "46             0           0          0          0           0          0   \n",
       "47             0           0          0          0           0          0   \n",
       "48             0           0          0          0           0          0   \n",
       "49             0           0          0          0           0          0   \n",
       "...          ...         ...        ...        ...         ...        ...   \n",
       "10604          0           0          0          0           0          0   \n",
       "10605          0           0          1          0           0          0   \n",
       "10606          0           0          0          0           0          0   \n",
       "10607          0           0          2          0           0          0   \n",
       "10608          0           0          0          0           0          3   \n",
       "10609          0           0          0          0           3          0   \n",
       "10610          0           1          0          0           0          0   \n",
       "10611          0           0          0          0           2          0   \n",
       "10612          0           0          0          0           0          0   \n",
       "10613          0           0          0          0           0          3   \n",
       "10614          0           0          0          0           0          0   \n",
       "10615          0           0          0          0           2          0   \n",
       "10616          0           0          0          0           2          0   \n",
       "10617          0           0          0          0           0          0   \n",
       "10618          0           0          0          0           0          0   \n",
       "10619          0           0          0          0           0          0   \n",
       "10620          1           0          0          0           0          0   \n",
       "10621          0           0          0          0           0          0   \n",
       "10622          0           3          0          0           0          0   \n",
       "10623          0           0          0          0           0          3   \n",
       "10624          0           0          3          0           0          0   \n",
       "10625          0           0          0          0           0          0   \n",
       "10626          0           0          0          0           0          0   \n",
       "10627          0           0          0          0           1          0   \n",
       "10628          0           0          0          0           0          0   \n",
       "10629          0           0          2          0           0          0   \n",
       "10630          0           0          0          0           0          1   \n",
       "10631          0           0          0          0           0          0   \n",
       "10632          0           0          1          0           0          0   \n",
       "10633          0           0          0          0           0          2   \n",
       "10634          0           0          0          0           0          0   \n",
       "10635          0           0          0          0           0          0   \n",
       "10636          0           0          0          0           0          2   \n",
       "10637          0           0          0          0           0          0   \n",
       "10638          0           0          0          0           0          0   \n",
       "10639          0           0          0          0           0          0   \n",
       "10640          0           0          0          0           0          0   \n",
       "10641          0           0          0          0           0          0   \n",
       "10642          0           0          0          0           0          0   \n",
       "10643          0           2          0          0           0          0   \n",
       "10644          0           0          0          0           0          0   \n",
       "10645          0           0          0          0           0          3   \n",
       "10646          0           3          0          0           0          0   \n",
       "10647          0           0          0          0           0          1   \n",
       "10648          0           0          0          0           0          0   \n",
       "10649          0           0          0          0           0          1   \n",
       "10650          0           0          0          0           0          0   \n",
       "10651          0           0          0          0           0          0   \n",
       "10652          0           2          0          0           0          0   \n",
       "10653          0           0          0          0           0          3   \n",
       "\n",
       "      subject_配置  \n",
       "0              0  \n",
       "1              0  \n",
       "2              0  \n",
       "3              0  \n",
       "4              0  \n",
       "5              0  \n",
       "6              0  \n",
       "7              0  \n",
       "8              0  \n",
       "9              0  \n",
       "10             0  \n",
       "11             0  \n",
       "12             0  \n",
       "13             0  \n",
       "14             0  \n",
       "15             0  \n",
       "16             0  \n",
       "17             0  \n",
       "18             0  \n",
       "19             0  \n",
       "20             0  \n",
       "21             0  \n",
       "22             0  \n",
       "23             0  \n",
       "24             0  \n",
       "25             0  \n",
       "26             0  \n",
       "27             0  \n",
       "28             0  \n",
       "29             0  \n",
       "30             0  \n",
       "31             0  \n",
       "32             0  \n",
       "33             0  \n",
       "34             0  \n",
       "35             0  \n",
       "36             3  \n",
       "37             1  \n",
       "38             3  \n",
       "39             1  \n",
       "40             3  \n",
       "41             1  \n",
       "42             2  \n",
       "43             3  \n",
       "44             1  \n",
       "45             1  \n",
       "46             1  \n",
       "47             1  \n",
       "48             1  \n",
       "49             1  \n",
       "...          ...  \n",
       "10604          0  \n",
       "10605          0  \n",
       "10606          0  \n",
       "10607          0  \n",
       "10608          0  \n",
       "10609          0  \n",
       "10610          0  \n",
       "10611          0  \n",
       "10612          0  \n",
       "10613          0  \n",
       "10614          0  \n",
       "10615          0  \n",
       "10616          0  \n",
       "10617          0  \n",
       "10618          1  \n",
       "10619          0  \n",
       "10620          0  \n",
       "10621          0  \n",
       "10622          0  \n",
       "10623          0  \n",
       "10624          0  \n",
       "10625          0  \n",
       "10626          0  \n",
       "10627          0  \n",
       "10628          1  \n",
       "10629          0  \n",
       "10630          0  \n",
       "10631          2  \n",
       "10632          0  \n",
       "10633          0  \n",
       "10634          0  \n",
       "10635          0  \n",
       "10636          0  \n",
       "10637          0  \n",
       "10638          0  \n",
       "10639          0  \n",
       "10640          2  \n",
       "10641          3  \n",
       "10642          2  \n",
       "10643          0  \n",
       "10644          0  \n",
       "10645          0  \n",
       "10646          0  \n",
       "10647          0  \n",
       "10648          0  \n",
       "10649          0  \n",
       "10650          1  \n",
       "10651          0  \n",
       "10652          0  \n",
       "10653          0  \n",
       "\n",
       "[10654 rows x 15 columns]"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sent_dict = {}\n",
    "\n",
    "train_df_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [],
   "source": [
    "sent_set = set(train_df['sentiment_word'].values)\n",
    "\n",
    "test_sent_list = []\n",
    "for c in test_df['content'].values:\n",
    "    flag = 0\n",
    "    for s in sent_set:\n",
    "        if str(s) == 'nan':\n",
    "            continue\n",
    "        if s in c:\n",
    "            test_sent_list.append(s)\n",
    "            flag = 1\n",
    "            break\n",
    "    if flag == 0:\n",
    "        test_sent_list.append('')\n",
    "test_df['sentiment_word'] = test_sent_list\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>content</th>\n",
       "      <th>sentiment_word</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01htTx8jcsqp3CYr</td>\n",
       "      <td>XV新款低配比以前低配配置方面高...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>03wYj5KyOnLcmbHe</td>\n",
       "      <td>助力跟着发动机走？很奇葩啊！最起...</td>\n",
       "      <td>奇葩</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>05JqRAdHXSsbxWvr</td>\n",
       "      <td>只有报警，如果你想检验一下可以把...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0653EkFWJzSYsP7w</td>\n",
       "      <td>不太好判断你描绘的声音，后座安全...</td>\n",
       "      <td>好</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>06H4OMfwaG1xoqFb</td>\n",
       "      <td>前雷达撞了感觉作用不大</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>06iTfdx8yo1OmEZq</td>\n",
       "      <td>这消耗对得住12FB25，我群里...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0aKUsYjmWXVMlc1w</td>\n",
       "      <td>召回跟变速箱硬件没关系不是质量问...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0aNFWEteHpDCxqj2</td>\n",
       "      <td>增强车辆尾部操控，我是防倾杆和小...</td>\n",
       "      <td>强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0aPkxUi8QZB49jAq</td>\n",
       "      <td>2.0的，国庆跑了一个来回高速，...</td>\n",
       "      <td>市区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0AsYm8O6JBuqCh73</td>\n",
       "      <td>跑达喀尔的长城和民用长城有关系吗...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0EAkf51SzTLG6ghq</td>\n",
       "      <td>要看你预算是多少了，漫步者也不错...</td>\n",
       "      <td>实惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0elCG69Ygt8Pn4hT</td>\n",
       "      <td>我也想改支持原来方向盘按键吗？多少米</td>\n",
       "      <td>少</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0evmp72SRax4Tquw</td>\n",
       "      <td>车有异响这是所有的车都会有的现象...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0F8ytidf6PamDRGX</td>\n",
       "      <td>内饰可不敢第一，途观L冠道心里苦啊。</td>\n",
       "      <td>第一</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0futiKXa5Eon9OBI</td>\n",
       "      <td>电瓶头负级拆一下就好了。负级是比...</td>\n",
       "      <td>不抖</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0g58m1hpyZjdksD4</td>\n",
       "      <td>发动机仓里的跟漏油似的！</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0GaZuph6FqvTHYKg</td>\n",
       "      <td>杭州优惠1.5W左右。</td>\n",
       "      <td>优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0goOzYUThjRQ5B2y</td>\n",
       "      <td>衡量一台车只看内饰太片面了，BY...</td>\n",
       "      <td>喜欢</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0gP1SuYKHVjDUZXN</td>\n",
       "      <td>16年2.5 95#综合由耗7.7</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0IRxNGSPiKg71oY4</td>\n",
       "      <td>确实只有1万的合理差值，但为了尾...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0iSwPD64tGvsAogc</td>\n",
       "      <td>自己去论坛里找，两例2.0烧机油...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>0IVPXJjFT6mMQvR8</td>\n",
       "      <td>森林人这车就是用020的油。。我...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>0kZfMDNlHSXTwoVb</td>\n",
       "      <td>森林人我感觉空调一开是全功率在吹...</td>\n",
       "      <td>空调</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0L9oFv53GRgAKxZO</td>\n",
       "      <td>安索油的长效指标是最高的，油环少...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0lY1N5m9ForC4sjw</td>\n",
       "      <td>斯巴鲁在国内卖的贵配置还低，主要...</td>\n",
       "      <td>高</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0nIAjDeH84L1uSkb</td>\n",
       "      <td>谢谢解释，是不是可以简单理解为：...</td>\n",
       "      <td>市区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0nPKSdI4lmUEv6u3</td>\n",
       "      <td>13款fb20发动机 6速手动 ...</td>\n",
       "      <td>市区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>0oj9xOc1bdiVTDmI</td>\n",
       "      <td>有米必须上XT，加速感受比2.5...</td>\n",
       "      <td>强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>0pGMcY4nAIeV2xqW</td>\n",
       "      <td>估计最早18年10月，做小白鼠也...</td>\n",
       "      <td>足够</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0PkqaRMTlJwYhgH7</td>\n",
       "      <td>确实是一分价钱一分货！所以在纠结...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>0qKQsSZMWBugPekR</td>\n",
       "      <td>终于有个一样感觉的朋友了，拿着森...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>0RD2OSET4WNFQcYr</td>\n",
       "      <td>明天提车，18款premium ...</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>0rdH5sbR7CteoOUW</td>\n",
       "      <td>讲操控还要在山路驾驶上比，高速大...</td>\n",
       "      <td>强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>0SpZ2tHfBg8dLkRc</td>\n",
       "      <td>我也觉得森林人的味道小，两三个礼...</td>\n",
       "      <td>没有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>0SwhJdO4HzX3MF6s</td>\n",
       "      <td>你好哥们，我这车也是xt，三年了...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>0sx7Mr5RtQaT1Obg</td>\n",
       "      <td>音响是建伍品牌，不知真假</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>0T2YuZpFsABCybz7</td>\n",
       "      <td>就我觉得新款外观无比平庸，远远不...</td>\n",
       "      <td>平庸</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>0tVHv4SZnlkuoIwg</td>\n",
       "      <td>变速箱是硬伤，我的2.5加速不够...</td>\n",
       "      <td>不够</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>0U5V2ablhdEfK4GF</td>\n",
       "      <td>18款2.0一开始加的92，跑了...</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>0UjRrg1oJKOlsDZy</td>\n",
       "      <td>英镑大跌，英国的经销商完全可以提...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>0uzRerwNi3KJ7L1F</td>\n",
       "      <td>真好，放弃蔚揽，就是觉得舒行的配...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>0veEh8GcN4oBMRO2</td>\n",
       "      <td>途观没开过，SUV简单对比，我开...</td>\n",
       "      <td>厉害</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>0vwSAdLsmoXuOf7U</td>\n",
       "      <td>换新款 CVT加自动启停 市区才...</td>\n",
       "      <td>市区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>0w4F1sHvy7JLnVDl</td>\n",
       "      <td>欧蓝德早就国产了，广汽三菱欧蓝德...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>0wDgjI7Gt1a9iHFm</td>\n",
       "      <td>你的耐用性怎么定义？2000转持...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>0xMzRKSWuwc6QOik</td>\n",
       "      <td>这车不睬刹车一样也能打着，懒得踩...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>0y5iDkRJPxa3BvXw</td>\n",
       "      <td>个人认为模拟档位只是提高驾驶乐趣...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>0Z6xEOVphIgLod4X</td>\n",
       "      <td>力狮空间比森林人大很多。。</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>0Zh2ndUCFcuGq5vM</td>\n",
       "      <td>妥妥的，7月份开2.0时尚进藏，...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>0zLr27HtD5AelUq4</td>\n",
       "      <td>谢谢，我看了下18的内饰确实也很...</td>\n",
       "      <td>很一般</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2703</th>\n",
       "      <td>ZE563haGl2TrmUzK</td>\n",
       "      <td>这个我同意，高配好像是白黑拼色是...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>ZF5AeGsPEr3VKv4W</td>\n",
       "      <td>轮毂的J值看到有的人说是可以上2...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2705</th>\n",
       "      <td>zfepvd2rBiIMOqS1</td>\n",
       "      <td>轮胎动平衡做一个就好，刹车抖换刹车盘</td>\n",
       "      <td>抖</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>ZFyWCP6YujnMsabq</td>\n",
       "      <td>听说2019款森林人噪音有改善。</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2707</th>\n",
       "      <td>Zg1uknB2VEqXa94t</td>\n",
       "      <td>是导航音质都不会好到哪里去，除非...</td>\n",
       "      <td>原厂</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2708</th>\n",
       "      <td>Zi7Vklfocs4dB9Hr</td>\n",
       "      <td>对于车内异味的话，这个天正好开窗...</td>\n",
       "      <td>好开</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2709</th>\n",
       "      <td>zI9S3bKVMRdAoy1h</td>\n",
       "      <td>其实内饰也可以猜到大概什么样，比...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2710</th>\n",
       "      <td>ZIjeybOiWSJA3UGc</td>\n",
       "      <td>好复杂，4S一个师傅说估计是半轴...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2711</th>\n",
       "      <td>zIxqSwLQAGsVrdn1</td>\n",
       "      <td>XT上sport模式猛踩推背感还...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2712</th>\n",
       "      <td>ZJ3ajfVXhO4rPdoB</td>\n",
       "      <td>没有异响就不是正宗斯巴鲁？</td>\n",
       "      <td>没有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2713</th>\n",
       "      <td>zjemayo25vJDESuH</td>\n",
       "      <td>我的森林人混合动力，机油汽油一起...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2714</th>\n",
       "      <td>ZkISbOqh6ixQAz7F</td>\n",
       "      <td>个人建议搞个废旧手机整个流量卡插...</td>\n",
       "      <td>旧</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2715</th>\n",
       "      <td>ZkpROl7deciBtQVX</td>\n",
       "      <td>对 我就是因为最近跑上路多 下山...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2716</th>\n",
       "      <td>zkxTYaZmtwLS769u</td>\n",
       "      <td>是的，我开起亚K4的刹车很灵的1...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2717</th>\n",
       "      <td>zL1doOK2Rs4Tu7Sc</td>\n",
       "      <td>你好，我想请教你一下，同样13款...</td>\n",
       "      <td>没有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2718</th>\n",
       "      <td>ZL4GrBCOMT72atl6</td>\n",
       "      <td>谈不上什么真爱。车子只不过是个生...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2719</th>\n",
       "      <td>ZL70Sl1THNmxjPw3</td>\n",
       "      <td>原来是要把镜片外沿那部分打磨啊，...</td>\n",
       "      <td>一点点</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2720</th>\n",
       "      <td>zLcmh0s9f7tuvr2y</td>\n",
       "      <td>我的也是。有人分享过，是加密了。...</td>\n",
       "      <td>有</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2721</th>\n",
       "      <td>ZlfKXOg1c3nUd4qN</td>\n",
       "      <td>XV刹车行程比森短，那就是在刹车...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2722</th>\n",
       "      <td>zm3aPt6bvZuygGj5</td>\n",
       "      <td>新车按说明书推荐的用，不会错。特...</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2723</th>\n",
       "      <td>ZN19DMsRqn0f2jLr</td>\n",
       "      <td>不用热车，那个破CVT有热保护，...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2724</th>\n",
       "      <td>ZnDUOSW6GN4aeuCM</td>\n",
       "      <td>这内饰比现在好多了，换代森林人也...</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2725</th>\n",
       "      <td>zNOpw2XDJxFkCofv</td>\n",
       "      <td>质保期内可以索赔，换了新的后，使...</td>\n",
       "      <td>可以</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2726</th>\n",
       "      <td>ZnUxmYN7lhCHrsGy</td>\n",
       "      <td>买个静音计划中控降噪音条或者国产的塞。</td>\n",
       "      <td>噪音</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2727</th>\n",
       "      <td>ZOAU96PMEn4wzyRr</td>\n",
       "      <td>这导航设计的人脑袋。。也真是的</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2728</th>\n",
       "      <td>zoEWAY2iHd0phLbS</td>\n",
       "      <td>我还没买呢，就是因为听他说这个价...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2729</th>\n",
       "      <td>zoYdIOEQygqhVDnX</td>\n",
       "      <td>都是这样的，顶棚内饰与前风挡是有...</td>\n",
       "      <td>一点点</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2730</th>\n",
       "      <td>ZPgaN2jw9A4etqWo</td>\n",
       "      <td>倍耐力是不是比较硬 胎噪比较大？...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2731</th>\n",
       "      <td>zq8NBYC3DZwhmgi7</td>\n",
       "      <td>外地买，优惠两万差不多</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2732</th>\n",
       "      <td>ZQha9uyENtGiXSlk</td>\n",
       "      <td>晕，粘度高最多费点油，有什么不好...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2733</th>\n",
       "      <td>ZQuHmL51yMjfhdNT</td>\n",
       "      <td>我估计新车定价不会很高，本来这个...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2734</th>\n",
       "      <td>ZrHqi0s6zK4kAGbf</td>\n",
       "      <td>这样把你觉得我比4S贵没问题，我...</td>\n",
       "      <td>问题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2735</th>\n",
       "      <td>ZRIkFXsrlfqQyBnd</td>\n",
       "      <td>那油耗还可以，我开车大部分时间很温柔</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2736</th>\n",
       "      <td>zrQwD9Vt0qfFACLi</td>\n",
       "      <td>说7.8.9那种油耗的，要么每天...</td>\n",
       "      <td>高</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2737</th>\n",
       "      <td>ZsDzqcSI4W32yEb6</td>\n",
       "      <td>傲虎舒适度好，森舒适度差点。还有...</td>\n",
       "      <td>差点</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2738</th>\n",
       "      <td>zSfR6q8UkdY1NrCj</td>\n",
       "      <td>业内人士都知道，国内的原材料加工...</td>\n",
       "      <td>高</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2739</th>\n",
       "      <td>ZsyGQ2DXnLgWKClO</td>\n",
       "      <td>到店了？坐标哪里？深圳的说还没有...</td>\n",
       "      <td>旧</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2740</th>\n",
       "      <td>ZsYPd6yQ95V1xWj8</td>\n",
       "      <td>37万，希望我也能开到！一年3万...</td>\n",
       "      <td>差</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2741</th>\n",
       "      <td>ZUcv0FYIMAVkOdjz</td>\n",
       "      <td>这尼玛不是你风格啊，是个男人带把...</td>\n",
       "      <td>全时四驱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2742</th>\n",
       "      <td>zUhjVLOYFpIsDkxl</td>\n",
       "      <td>发动机自动启停对发动机没多少伤害...</td>\n",
       "      <td>省</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2743</th>\n",
       "      <td>ZuPb8RNdroEIavy9</td>\n",
       "      <td>哎呀，别和经销商赌气，现阶段买x...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2744</th>\n",
       "      <td>Zv51Qg8l4oG7XWNK</td>\n",
       "      <td>我跑3500公里长途，有一点消耗...</td>\n",
       "      <td>市区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2745</th>\n",
       "      <td>Zv5DU4JOuFnpc6MT</td>\n",
       "      <td>最多10秒 现在的发动机无需热车</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2746</th>\n",
       "      <td>ZVsWUvKBeTg7iGDo</td>\n",
       "      <td>2.0对油耗不是特别在意，开的不...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2747</th>\n",
       "      <td>zWLKO0VFxmvkS6RQ</td>\n",
       "      <td>我只黑庞大后换的那个垃圾大屏。外...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2748</th>\n",
       "      <td>ZX3jmBtvdNYzpOiq</td>\n",
       "      <td>全系标配es会不会价格上去</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2749</th>\n",
       "      <td>ZXKbJ5SifwD6pdmu</td>\n",
       "      <td>用ephone刹车皮3个月</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2750</th>\n",
       "      <td>ZypUkorMqD7THle9</td>\n",
       "      <td>准备更新一下换了大屏导航以后收音...</td>\n",
       "      <td>大</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2751</th>\n",
       "      <td>ZYrXDv38lRpEL7jn</td>\n",
       "      <td>看说明书 后排座椅这个地方比较隐...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2752</th>\n",
       "      <td>Zz3EQdlrNtUfWwbJ</td>\n",
       "      <td>连续三声滴滴滴打不开，长按后备箱...</td>\n",
       "      <td>可以</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2753 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            content_id              content sentiment_word\n",
       "0     01htTx8jcsqp3CYr  XV新款低配比以前低配配置方面高...               \n",
       "1     03wYj5KyOnLcmbHe  助力跟着发动机走？很奇葩啊！最起...             奇葩\n",
       "2     05JqRAdHXSsbxWvr  只有报警，如果你想检验一下可以把...              有\n",
       "3     0653EkFWJzSYsP7w  不太好判断你描绘的声音，后座安全...              好\n",
       "4     06H4OMfwaG1xoqFb          前雷达撞了感觉作用不大              大\n",
       "5     06iTfdx8yo1OmEZq  这消耗对得住12FB25，我群里...              有\n",
       "6     0aKUsYjmWXVMlc1w  召回跟变速箱硬件没关系不是质量问...             问题\n",
       "7     0aNFWEteHpDCxqj2  增强车辆尾部操控，我是防倾杆和小...              强\n",
       "8     0aPkxUi8QZB49jAq  2.0的，国庆跑了一个来回高速，...             市区\n",
       "9     0AsYm8O6JBuqCh73  跑达喀尔的长城和民用长城有关系吗...              有\n",
       "10    0EAkf51SzTLG6ghq  要看你预算是多少了，漫步者也不错...             实惠\n",
       "11    0elCG69Ygt8Pn4hT   我也想改支持原来方向盘按键吗？多少米              少\n",
       "12    0evmp72SRax4Tquw  车有异响这是所有的车都会有的现象...              大\n",
       "13    0F8ytidf6PamDRGX   内饰可不敢第一，途观L冠道心里苦啊。             第一\n",
       "14    0futiKXa5Eon9OBI  电瓶头负级拆一下就好了。负级是比...             不抖\n",
       "15    0g58m1hpyZjdksD4     发动机仓里的跟漏油似的！                   \n",
       "16    0GaZuph6FqvTHYKg          杭州优惠1.5W左右。             优惠\n",
       "17    0goOzYUThjRQ5B2y  衡量一台车只看内饰太片面了，BY...             喜欢\n",
       "18    0gP1SuYKHVjDUZXN    16年2.5 95#综合由耗7.7               \n",
       "19    0IRxNGSPiKg71oY4  确实只有1万的合理差值，但为了尾...              大\n",
       "20    0iSwPD64tGvsAogc  自己去论坛里找，两例2.0烧机油...               \n",
       "21    0IVPXJjFT6mMQvR8  森林人这车就是用020的油。。我...               \n",
       "22    0kZfMDNlHSXTwoVb  森林人我感觉空调一开是全功率在吹...             空调\n",
       "23    0L9oFv53GRgAKxZO  安索油的长效指标是最高的，油环少...              有\n",
       "24    0lY1N5m9ForC4sjw  斯巴鲁在国内卖的贵配置还低，主要...              高\n",
       "25    0nIAjDeH84L1uSkb  谢谢解释，是不是可以简单理解为：...             市区\n",
       "26    0nPKSdI4lmUEv6u3  13款fb20发动机 6速手动 ...             市区\n",
       "27    0oj9xOc1bdiVTDmI  有米必须上XT，加速感受比2.5...              强\n",
       "28    0pGMcY4nAIeV2xqW  估计最早18年10月，做小白鼠也...             足够\n",
       "29    0PkqaRMTlJwYhgH7  确实是一分价钱一分货！所以在纠结...               \n",
       "30    0qKQsSZMWBugPekR  终于有个一样感觉的朋友了，拿着森...               \n",
       "31    0RD2OSET4WNFQcYr  明天提车，18款premium ...              差\n",
       "32    0rdH5sbR7CteoOUW  讲操控还要在山路驾驶上比，高速大...              强\n",
       "33    0SpZ2tHfBg8dLkRc  我也觉得森林人的味道小，两三个礼...             没有\n",
       "34    0SwhJdO4HzX3MF6s  你好哥们，我这车也是xt，三年了...               \n",
       "35    0sx7Mr5RtQaT1Obg      音响是建伍品牌，不知真假                  \n",
       "36    0T2YuZpFsABCybz7  就我觉得新款外观无比平庸，远远不...             平庸\n",
       "37    0tVHv4SZnlkuoIwg  变速箱是硬伤，我的2.5加速不够...             不够\n",
       "38    0U5V2ablhdEfK4GF  18款2.0一开始加的92，跑了...              差\n",
       "39    0UjRrg1oJKOlsDZy  英镑大跌，英国的经销商完全可以提...              大\n",
       "40    0uzRerwNi3KJ7L1F  真好，放弃蔚揽，就是觉得舒行的配...               \n",
       "41    0veEh8GcN4oBMRO2  途观没开过，SUV简单对比，我开...             厉害\n",
       "42    0vwSAdLsmoXuOf7U  换新款 CVT加自动启停 市区才...             市区\n",
       "43    0w4F1sHvy7JLnVDl  欧蓝德早就国产了，广汽三菱欧蓝德...               \n",
       "44    0wDgjI7Gt1a9iHFm  你的耐用性怎么定义？2000转持...              大\n",
       "45    0xMzRKSWuwc6QOik  这车不睬刹车一样也能打着，懒得踩...               \n",
       "46    0y5iDkRJPxa3BvXw  个人认为模拟档位只是提高驾驶乐趣...              有\n",
       "47    0Z6xEOVphIgLod4X    力狮空间比森林人大很多。。                  大\n",
       "48    0Zh2ndUCFcuGq5vM  妥妥的，7月份开2.0时尚进藏，...             问题\n",
       "49    0zLr27HtD5AelUq4  谢谢，我看了下18的内饰确实也很...            很一般\n",
       "...                ...                  ...            ...\n",
       "2703  ZE563haGl2TrmUzK  这个我同意，高配好像是白黑拼色是...               \n",
       "2704  ZF5AeGsPEr3VKv4W  轮毂的J值看到有的人说是可以上2...             问题\n",
       "2705  zfepvd2rBiIMOqS1   轮胎动平衡做一个就好，刹车抖换刹车盘              抖\n",
       "2706  ZFyWCP6YujnMsabq     听说2019款森林人噪音有改善。              有\n",
       "2707  Zg1uknB2VEqXa94t  是导航音质都不会好到哪里去，除非...             原厂\n",
       "2708  Zi7Vklfocs4dB9Hr  对于车内异味的话，这个天正好开窗...             好开\n",
       "2709  zI9S3bKVMRdAoy1h  其实内饰也可以猜到大概什么样，比...              大\n",
       "2710  ZIjeybOiWSJA3UGc  好复杂，4S一个师傅说估计是半轴...             问题\n",
       "2711  zIxqSwLQAGsVrdn1  XT上sport模式猛踩推背感还...              大\n",
       "2712  ZJ3ajfVXhO4rPdoB        没有异响就不是正宗斯巴鲁？             没有\n",
       "2713  zjemayo25vJDESuH  我的森林人混合动力，机油汽油一起...             问题\n",
       "2714  ZkISbOqh6ixQAz7F  个人建议搞个废旧手机整个流量卡插...              旧\n",
       "2715  ZkpROl7deciBtQVX  对 我就是因为最近跑上路多 下山...               \n",
       "2716  zkxTYaZmtwLS769u  是的，我开起亚K4的刹车很灵的1...               \n",
       "2717  zL1doOK2Rs4Tu7Sc  你好，我想请教你一下，同样13款...             没有\n",
       "2718  ZL4GrBCOMT72atl6  谈不上什么真爱。车子只不过是个生...              大\n",
       "2719  ZL70Sl1THNmxjPw3  原来是要把镜片外沿那部分打磨啊，...            一点点\n",
       "2720  zLcmh0s9f7tuvr2y  我的也是。有人分享过，是加密了。...              有\n",
       "2721  ZlfKXOg1c3nUd4qN  XV刹车行程比森短，那就是在刹车...               \n",
       "2722  zm3aPt6bvZuygGj5  新车按说明书推荐的用，不会错。特...              差\n",
       "2723  ZN19DMsRqn0f2jLr  不用热车，那个破CVT有热保护，...              大\n",
       "2724  ZnDUOSW6GN4aeuCM  这内饰比现在好多了，换代森林人也...              差\n",
       "2725  zNOpw2XDJxFkCofv  质保期内可以索赔，换了新的后，使...             可以\n",
       "2726  ZnUxmYN7lhCHrsGy  买个静音计划中控降噪音条或者国产的塞。             噪音\n",
       "2727  ZOAU96PMEn4wzyRr      这导航设计的人脑袋。。也真是的               \n",
       "2728  zoEWAY2iHd0phLbS  我还没买呢，就是因为听他说这个价...               \n",
       "2729  zoYdIOEQygqhVDnX  都是这样的，顶棚内饰与前风挡是有...            一点点\n",
       "2730  ZPgaN2jw9A4etqWo  倍耐力是不是比较硬 胎噪比较大？...              大\n",
       "2731  zq8NBYC3DZwhmgi7       外地买，优惠两万差不多                 差\n",
       "2732  ZQha9uyENtGiXSlk  晕，粘度高最多费点油，有什么不好...               \n",
       "2733  ZQuHmL51yMjfhdNT  我估计新车定价不会很高，本来这个...               \n",
       "2734  ZrHqi0s6zK4kAGbf  这样把你觉得我比4S贵没问题，我...             问题\n",
       "2735  ZRIkFXsrlfqQyBnd   那油耗还可以，我开车大部分时间很温柔              大\n",
       "2736  zrQwD9Vt0qfFACLi  说7.8.9那种油耗的，要么每天...              高\n",
       "2737  ZsDzqcSI4W32yEb6  傲虎舒适度好，森舒适度差点。还有...             差点\n",
       "2738  zSfR6q8UkdY1NrCj  业内人士都知道，国内的原材料加工...              高\n",
       "2739  ZsyGQ2DXnLgWKClO  到店了？坐标哪里？深圳的说还没有...              旧\n",
       "2740  ZsYPd6yQ95V1xWj8  37万，希望我也能开到！一年3万...              差\n",
       "2741  ZUcv0FYIMAVkOdjz  这尼玛不是你风格啊，是个男人带把...           全时四驱\n",
       "2742  zUhjVLOYFpIsDkxl  发动机自动启停对发动机没多少伤害...              省\n",
       "2743  ZuPb8RNdroEIavy9  哎呀，别和经销商赌气，现阶段买x...               \n",
       "2744  Zv51Qg8l4oG7XWNK  我跑3500公里长途，有一点消耗...             市区\n",
       "2745  Zv5DU4JOuFnpc6MT     最多10秒 现在的发动机无需热车               \n",
       "2746  ZVsWUvKBeTg7iGDo  2.0对油耗不是特别在意，开的不...               \n",
       "2747  zWLKO0VFxmvkS6RQ  我只黑庞大后换的那个垃圾大屏。外...              大\n",
       "2748  ZX3jmBtvdNYzpOiq    全系标配es会不会价格上去                   \n",
       "2749  ZXKbJ5SifwD6pdmu        用ephone刹车皮3个月               \n",
       "2750  ZypUkorMqD7THle9  准备更新一下换了大屏导航以后收音...              大\n",
       "2751  ZYrXDv38lRpEL7jn  看说明书 后排座椅这个地方比较隐...               \n",
       "2752  Zz3EQdlrNtUfWwbJ  连续三声滴滴滴打不开，长按后备箱...             可以\n",
       "\n",
       "[2753 rows x 3 columns]"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7fdbc18bb080>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAE7CAYAAABqjySMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xm8HFWZ8PHfk4VAECKEsIkQVDYVEI04zoiA4gjjMvoqIjI6rrwuyKCI4juDIIKiwIijuKC4izAjiguuiCiOOAhugICMEFERRCERwhbgef84p3Mrnb739r23w13q9/18+lPpqlN1qyvdVec5a2QmkiRJkqR2mjXZJyBJkiRJmjwGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRicyb7BNaWTTbZJBcvXjzZpyFJkiRJk+LSSy/9c2YuGi3djA0KFy9ezCWXXDLZpyFJkiRJkyIifttPOpuPSpIkSVKLGRRKkiRJUov1HRRGxPYR8fWIWBERf4yIsyJiq8b2vSMih3md0nWsnSLi3IhYHhHLIuKciFjc42/2lU6SJEmSND599SmMiAcB5wEJHA2sD7wReGxE7JKZdwIb1eTvBK7pOsSvGsd6CHBhfXsCMBs4AvhBPdaysaSTJEmSJI1fvwPNvA54KLBzZl4OEBHLgFOAZwNnMRQUfjYzrxzhWEcDC4G9M/OCeqxrgDOBQ4Fjx5hOkiRJkjRO/TYffTywtBMQVr+uy8V1uXFd3jTcQSJiLnAAcHUn0KvOBpYDB40lnSRJkiRpYvoKCjPz+Zm5bdfqR9Xl7+pyI2AlcFBEXB8R90TEZRGxX2OfXYANgYu7jn8v8Atg+4hYOIZ0kiRJkqQJGPPooxGxRUQcCLwV+A3wlbppI2Au8Gbgg5Tmn1sBX4mIHWqaxXV5Q49D31iX24whnSRJkiRpAsYzef1HgWdQArbnZObtdf1S4FzgtZl5PUBEXAd8njJAzCuB+TXtyh7Hvacu548h3Woi4mDgYICtt9667w8kSZIkSW01nnkKjwcOB+6jjAS6K0Bmvjszn9kJCKvz6rLT1HRFXS7ocdwFjTT9pltNZp6WmUsyc8miRYv6+jCSJEmS1GZjDgoz86LM/HfgeZQmoyONAnpbXXYGobm2LjftkbYTxS0dQzpJkiRJ0gSM2nw0IjYBfgh8OzMPbWy6oi53qIO+fB84LzMPa6TZsi5/X5eXU0YP3aPrb6wH7AZcmZm3RsRt/aTr4/MN1OIjz32g/+S4LD3hGZN9CpIkSZKmiX5qCm8BNgGeExHzGut3r8vf1DSbAi+IiHUbaV5Yl1+HVaOHfh7YMiL2aaR7LjAPOGMs6SRJkiRJEzNqTWFm3h8R7wJOAr4XEWcCGwCHUAZ9OT4zMyKOp0xmf0FEnAFsW9NcAXyocch3APsDZ0XEScBsyoilvwPeP450kiRJkqRx6neewpOB5wNBCdZeT5lDcPfM/FFN8z7KCKMbUALIFwGfAPbMzDsax7oBeBLwY8q0FkcA5wNPzszlY00nSZIkSRq/vqekyMyzgbNHSXM6cHofx7qKMq3FQNJJkiRJksZnPFNSSJIkSZJmCINCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMYNCSZIkSWqxvoPCiNg+Ir4eESsi4o8RcVZEbNWVZqeIODcilkfEsog4JyIW9zjWQNNJkiRJksZnTj+JIuJBwHlAAkcD6wNvBB4bEbtk5p0R8RDgwrrLCcBs4AjgBzXNsnqsgaaTJEmSJI1fX0Eh8DrgocDOmXk5QEQsA04Bng2cRQkWFwJ7Z+YFNc01wJnAocCx9ViDTidJkiRJGqd+m48+HljaCQirX9fl4oiYCxwAXN0J4KqzgeXAQQCDTidJkiRJmpi+gsLMfH5mbtu1+lF1+TtgF2BD4OKu/e4FfgFsHxEL10I6SZIkSdIEjHn00YjYIiIOBN4K/Ab4CrC4br6hxy431uU2ayGdJEmSJGkCxjMlxUeBM4C7gOdk5u3A/LptZY/099Tl/LWQbjURcXBEXBIRl9x8880jfghJkiRJ0viCwuOBw4H7KCOB7gqsqNsW9EjfWbdiLaRbTWaelplLMnPJokWLRvwQkiRJkqT+Rx9dJTMvAi6KiAspff6OpYwUCrBpj1060dlSypQWg0wnSZIkSZqAUWsKI2KTiLgqIv6ja9MVdbkDcDllVNA9uvZdD9gNuDIzb10L6SRJkiRJE9BP89FbgE2A50TEvMb63evyN3VU0M8DW0bEPo00zwXmUfogMuh0kiRJkqSJGbX5aGbeHxHvAk4CvhcRZwIbAIdQBn05viZ9B7A/cFZEnATMBt5MmbLi/Y1DDjqdJEmSJGmc+p2n8GTg+UBQgrXXU/oT7p6ZP6ppbgCeBPyYMl3FEcD5wJMzc3njWANNJ0mSJEkav74HmsnMs4GzR0lzFfCMPo410HSSJEmSpPEZz5QUkiRJkqQZwqBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWqzvoDAidoyIcyPitohYERHfjoidG9v3jogc5nVK17F2qsdaHhHLIuKciFjc42/2lU6SJEmSND5z+kkUEQuBC4D1gROB2cCbge9ExPaZ+Vdgo5r8ncA1XYf4VeNYDwEurG9PqMc6AvhBROySmcvGkk6SJEmSNH59BYXAK4DNgAMz80yAiFgJvAN4MXAqQ0HhZzPzyhGOdTSwENg7My+ox7oGOBM4FDh2jOkkSZIkSePUb/PRJ9TltxvrLq3LHety47q8abiDRMRc4ADg6k6gV50NLAcOGks6SZIkSdLE9BsUfhE4Cri1sW7LurylLjcCVgIHRcT1EXFPRFwWEfs19tkF2BC4uHnwzLwX+AWwfW2q2m86SZIkSdIE9NV8NDM/13wfEXOA1wAJfLmu3giYS+lreCoQ9d9fiYhHZ+bVwOKa9oYef+bGutxmDOn+0s/5S5IkSZJ667dP4SoRMQv4EPA44D2Z+dO6aSlwLvDazLy+pr0O+DxlgJhXAvNr2pU9Dn1PXc4fQ7ruczsYOBhg66237vszSZIkSVJbjWmewoiYB5xBCfA+AhzZ2ZaZ787MZ3YCwuq8unxUXa6oywU9Dr+gkabfdKvJzNMyc0lmLlm0aNFoH0eSJEmSWq/vmsKImE+pCdwLOC4zj+pjt9vqsjMIzbV1uWmPtJ0obimlWWo/6SRJkiRJE9BXTWFtMno2sCeleehRXdsXRsTl3ZPUMzQYze/r8nLK6KF7dO2/HrAbcGVm3jqGdJIkSZKkCei3+eihwL7A0Zn5oR7bb6HU6r0gItZtrH9hXX4dVo0e+nlgy4jYp5HuuUCnaWrf6SRJkiRJEzNq89FaO/evwDLghoh4aVeS2zPzCxFxPHAKcEFEnAFsCxwCXEEZmKbjHcD+wFkRcRIwmzJK6e+A948jnSRJkiRpnPrpU7gZsEn998d6bP8t8IXMfF9E3A68ETiJMqfhJ4C3ZuYdncSZeUNEPAk4GXgrpf/g+cBhmbl8rOkkSZIkSeM3alCYmUspcw6OKjNPB07vI91VwDMGlU6SJEmSND5jmpJCkiRJkjSzGBRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUov1HRRGxI4RcW5E3BYRKyLi2xGxc1eanWqa5RGxLCLOiYjFPY410HSSJEmSpPGZ00+iiFgIXACsD5wIzAbeDHwnIrbPzL9GxEOAC+suJ9Q0RwA/iIhdMnNZPdZA00mSJEmSxq+voBB4BbAZcGBmngkQESuBdwAvBk4FjgYWAntn5gU1zTXAmcChwLH1WINOJ0mSJEkap36bjz6hLr/dWHdpXe4YEXOBA4CrOwFcdTawHDgIYNDpJEmSJEkT029Q+EXgKODWxrot6/IWYBdgQ+Di5k6ZeS/wC2D72gR10OkkSZIkSRPQV/PRzPxc831EzAFeAyTwZWDbuumGHrvfWJfbAIsHnO4vI5+5JEmSJGkkY56SIiJmAR8CHgecmJk/BebXzSt77HJPXc5fC+m6z+3giLgkIi65+eabR/wckiRJkqQxBoURMQ84A3gl8BHgyLppRV0u6LHbgkaaQadbTWaelplLMnPJokWLhvsYkiRJkqSq39FHiYj5wLnAXsBxmXlUY/O1dblpj1070dlSSnPTQaaTJEmSJE1Av/MUzqKM/Lkn8NrM/FBXksspo4Lu0bXfesBuwJWZeWtE3DbIdP19REmSJEnScPptPnoosC9wdI+AsDMq6OeBLSNin8am5wKdJqcDTydJkiRJmpjIzJETlNq56ym1im8C7utKcntmfiEitgR+CQRwEjAbeDOwDNg5M5fX4w003XCWLFmSl1xySR+XoH+Ljzx3oMdbW5ae8IzJPgVJkiRJkywiLs3MJaOl66f56GbAJvXfH+ux/bfAFzLzhoh4EnAy8FZKv8DzgcOaAdyg00mSJEmSxm/UoDAzl1Jq60aVmVcBo1ZTDTqdJEmSJGl8xjxPoSRJkiRp5jAolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWmzPZJ6AWO2bBZJ9Bf45ZPtlnIEmSJK011hRKkiRJUosZFEqSJElSixkUSpIkSVKLjTkojIjtIuK+HutfFhE5zOuwrrR/GxHfj4jbI+LPEfGpiFjY45h9pZMkSZIkjU9fA81ExCzgEcBjgePoHUxuVJdvBG7t2nZx41i7AucDNwJHAZsChwOPjoi/ycyVY0knSZIkSRq/fkcf3RS4epQ0naDww5l55wjpTgRmA0/LzGsAImIZcAJwIPDpMaaTJEmSJI1Tv81HbwH2q6/LhkmzMXDbSAFhRGwO7AN8txPoVR+vy4PGkk6SJEmSNDF9BYWZeU9mfjMzv0kJEHvZCPhLRLw9Im6KiLsj4qKI2L2R5u+AoNGctB7/ZuB64IljTCdJkiRJmoBBjj66EbAYeAGlieeJwG7AtxuDwyyuyxt67H8jsEFEbDyGdJIkSZKkCei3T2E/rgRWAK/IzOUAEXE78C7gNZQBaubXtL0GibmnLuePId1qtZYRcTBwMMDWW289rg8hSZIkSW0ysJrCzHxjZj6/ExBW59Xlo+pyRV0u6HGIBY00/abrPofTMnNJZi5ZtGhR/ycvSZIkSS21tievv60uO009r63LTXukXQQsz8xbx5BOkiRJkjQBAwkKI2KXiLg8Io7o2rRlXf6+Ln8EJPDkrv23BTav28eSTpIkSZI0AYOqKbwe2AF4YUREY/0L6/LrAJn5J+BbwO4RsWMj3YF1ecZY0kmSJEmSJmYgA81k5rKI+ABwGPCNiPga8FjgZcB3gS81kr8F2Bv4VkScQmkiejhwKXDmONJJkiRJksZpkH0KD6cEctsB/06ZfP5E4FmZeX8nUWb+khLsLaWMSHowJch7embeO9Z0kiRJkqTxG3NNYWbuNcz6+4H31Ndox7gI2HNQ6SRJkiRJ47O2Rx+VJEmSJE1hg5y8XtIk2vlTO0/2KYzqsn++bLJPQZIkSV2sKZQkSZKkFjMolCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWc/RRSepy5Y47TfYp9GWnq66c7FOQJEkzgDWFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRizlMoSVqrTn31+ZN9Cn153YefMtmnIEnSpLCmUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWsygUJIkSZJabMxBYURsFxH3DbPtbyPi+xFxe0T8OSI+FREL13Y6SZIkSdL49DV5fUTMAh4BPBY4jh7BZETsCpwP3AgcBWwKHA48OiL+JjNXro10kiS1yckHPHOyT2FUh5/1tck+BUnSGPQVFFICsqtHSXMiMBt4WmZeAxARy4ATgAOBT6+ldJIkSZKkceq3+egtwH71dVn3xojYHNgH+G4ngKs+XpcHrY10kiRJkqSJ6SsozMx7MvObmflNSoDY7e+AAC7u2u9m4HrgiWspnSRJkiRpAgY1+ujiuryhx7YbgQ0iYuO1kE6SJEmSNAGDCgrn12WvwV/uaaQZdLrVRMTBEXFJRFxy8803j3rSkiRJktR2gwoKV9Tlgh7bFjTSDDrdajLztMxckplLFi1aNOpJS5IkSVLbDSoovLYuN+2xbRGwPDNvXQvpJEmSJEkTMKig8EdAAk9uroyIbYHN6/a1kU6SJEmSNAEDCQoz80/At4DdI2LHxqYD6/KMtZFOkiRJkjQx/U5e34+3AHsD34qIUyhNPw8HLgXOXIvpJEmSJEnjNKjmo2TmLylB3FLgOOBgSvD29My8d22lkyRJkiSN35hrCjNzrxG2XQTs2ccxBppOkiRJkjQ+A6splCRJkiRNPwaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GIGhZIkSZLUYgaFkiRJktRiBoWSJEmS1GJzJvsEJEmSJsPvj7xwsk+hL1udsMdkn4KkGc6aQkmSJElqMYNCSZIkSWoxg0JJkiRJajGDQkmSJElqMQeakSRJ0oQdc8wxk30KfZku5yk9kKwplCRJkqQWMyiUJEmSpBYzKJQkSZKkFjMolCRJkqQWc6AZSZIkaYr57vkPn+xT6MtTn/KbyT4FDYA1hZIkSZLUYgMNCiNi74jIYV6nNNLtFBHnRsTyiFgWEedExOIex+srnSRJkiRpfAbdfHSjunwncE3Xtl8BRMRDgAvruhOA2cARwA8iYpfMXDaWdJIkSZKk8VtbQeFnM/PKYdIcDSwE9s7MCwAi4hrgTOBQ4NgxppMkSZIkjdOgg8KN6/KmXhsjYi5wAHB1J9CrzgaWAwcBx/abbqBnLkmSJGnG2fx7P5/sU+jLjXs/ZtL+9qAHmtkIWAkcFBHXR8Q9EXFZROxXt+8CbAhc3NwpM+8FfgFsHxELx5BOkiRJkjQBa6P56FzgzcCpQNR/fyUiHg0srulu6LHvjXW5zRjS/WXCZyxJkiRJLTbooHApcC7w2sy8HiAirgM+Txkk5vs13coe+95Tl/Prq590q4mIg4GDAbbeeuuxn70kSZIktcxAm49m5rsz85mdgLA6ry4fBayo/17QY/fOuhVjSNf990/LzCWZuWTRokVjO3lJkiRJaqFB1xT2cltdbgxcW/+9aY90nShuKZB9ppMkSZIkTcDAagojYmFEXN6cpL7asi5/D1xOGT10j6591wN2A67MzFvHkE6SJEmSNAGDbD56C6Vm7wURsW5j/Qvr8ut19NDPA1tGxD6NNM8F5gFnwKpRRkdNJ0mSJEmamIE1H83MjIjjgVOACyLiDGBb4BDgCuBDNek7gP2BsyLiJGA2ZYTS3wHvbxyy33SSJEmSpHEaaJ/CzHxfRNwOvBE4CbgV+ATw1sy8o6a5ISKeBJwMvJXSf/B84LDMXN44Vl/pJEmSJEnjN/CBZjLzdOD0UdJcBTyjj2P1lU6SJEmSND4DnZJCkiRJkjS9GBRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUosZFEqSJElSixkUSpIkSVKLGRRKkiRJUotN+aAwInaKiHMjYnlELIuIcyJi8WSflyRJkiTNBHMm+wRGEhEPAS6sb08AZgNHAD+IiF0yc9mknZwkSZIkzQBTOigEjgYWAntn5gUAEXENcCZwKHDs5J2aJEmSJE1/U7b5aETMBQ4Aru4EhNXZwHLgoMk4L0mSJEmaSaZsUAjsAmwIXNxcmZn3Ar8Ato+IhZNxYpIkSZI0U0zloHBxXd7QY9uNdbnNA3MqkiRJkjQzRWZO9jn0FBEvBj4NHJeZR3Vt+wzwT8AemfnDxvqDgYPr2x2Aqx+g052ITYA/T/ZJzCBez8HxWg6W13OwvJ6D5fUcHK/lYHk9B8vrOTjT5Vpuk5mLRks0lQeaWVGXC3psW9CVBoDMPA04bW2e1KBFxCWZuWSyz2Om8HoOjtdysLyeg+X1HCyv5+B4LQfL6zlYXs/BmWnXcio3H722Ljftsa0T7S59YE5FkiRJkmamqRwUXk4ZZXSP5sqIWA/YDbgyM2+djBOTJEmSpJliygaFdZTRzwNbRsQ+jU3PBeYBZ0zKiQ3etGruOg14PQfHazlYXs/B8noOltdzcLyWg+X1HCyv5+DMqGs5ZQeaAYiILYFfAgGcBMwG3gwsA3bOzOWTeHqSJEmSNO1N6aAQICJ2BE6mNCNN4HvAYZm5dDLPaywi4hHAw4GfZeafxrjvc+q+P8rMi9bG+c1kEfFQYGVm3jhq4paJiDnAg4A7MvOeyT6fqS4iAphdWzFIky4iFgHrAiuB+yjPyI7ZwFzg/vp+M+DuxnuA2zLzD13HHO8AdPflVM9QjCAi1vE++MCKiO0o38dr+/3uRMQWwF96/V9FxBaZ+ccBn+aUExHzgMcAP8nM+7u2HQQ8ETgyM28f5/FnMXJLwvu7/66KiNgAmJuZt0z2uYzHlG0+2pGZV2XmMzJzw8xckJnPmU4BYbUE+Cbw8nHs+3BKLemjB3pGU1hEPCwiXhERD+mx7aMR8daI2LDPw/2IMrVJ93HmRcRvIuLkiZ7vVFULVEayO3ArcECfx9s6Io6JiMdM+OSmp0OAlRGxbnNlRNwWETnM6/peB4qIfSPiTcN9jyNi84h4dUQ8bS18jgdERCyc7HNogbcDV9bX74DrgWuAm+u6K4CPAwdS7oUX1eV/Az8H3tc8WA0yV47z9Za19inXoojYMiIuB04dwLEWRMTlEfG+iHjQGPd9UETsExFbj5BmfkS8MCJmSn7g08DFYyxMOAX4fURs3mPbDyPivyNi9mBOb8r6G+DHlLxhtz2A11EKhXqKiHUjYp1a0NnLuYz8W/+v8Z/61BcRm0XE5yLi7WPcbwPgF8BnOtc2Itarv+111sa5DtqUDwqnsohYGBGPi4hHRsSO9bVrRDy2K+kddXnFOP5MZzCdK8d/ptPOPwIfA17ZXBkRO9V1x1BKwPvxZ3rPIfOPwMOA68Z9llNYROwH/CoivtgruK4638s7+zzsJsDRwPaj/O2FEfH6iLgoIh7c57Gng7uAFZl5V9f624FjgS26Xicz/LV9FnAiq9fsNG0MfIjycJ92aknzjyLim7VUu7N+g/qQHC4zMp6/tWVEPDUijoyIcyPivSOknVEZ78x8bWY+KDM3As4EPkgdnC0zN64Fqftm5smZuV59v3FmLqzpu7/LnXvCyzIzMjOAs4ArGu9fXNNs21h3H/3fR6aaPwLrAft3F/iMw7HAIykFSGMdpn4b4DvAc0ZI82DKWAvPHdfZTT13Ab/tN3Gtxd4H+FJ365+I2J7yTD8nM+8b6FlOPftQgrMTe2zr/A67f9tNx1FbDXQVYv61br8HuBjYrsfr53XfmewWyn30kIiY3+9OmXkb8APgHxh6dh8N3AbcPULh8ZsG/QHGayrPUzgdPI1yg+52E9AsxepUs988jr/R2Xe6PnDHY39K5uQ/utYfTclE300pLXwxo7ubcoPr9ua6fFlE/FPXtuMy82v9n+6UdAHwfuD1wD4R8erM7B6c6d6u5RpqUPcwyvdvq7p684jYlTLg0x+ArSkPi12Bv6NkhmYDvwf2As6Z+MeZEu6j97W6n9IMrzuTcjvDB32d/tAratqNWX36nfspU+7s0aPG977MvGZsp/6Aewml8GBj4HkMDQx2IeV7Qo+48CfAEzq1BhHxYmDDzFxVgxMRHwO2BTagTE20GSVDD6Wv+R+B+RGx6TBN9TsZ739hzftLRyfj/TbKKNhTWiOQmV1f63Stv3eEJs/d6zsZoE9ExCe6/k73d/m6rv/D9ZjCIuJvGb4g/GJKy4knD1O7f3tm/n6U4+9Lud+eAmwEnBERjxlDl5G7upad455Euce+aLg009j9lEK1fj2Lck+5NSI6hca/y8xvUQqM7wA+2r1TLYRal9Jl6o7u7dPQ8ynBx4KIaM7lfR3lOUVj2csJwAco+aOVdd3bgCfXf68E7szM/+3eMSLuHuXY00Kt1RuuwBzgbOCplHvC0mHS/LpHM9o3Ugp23hQRH6YE7qdS8qHd1202cCNTKMg2KJyYL1EyECsy895641kPmFebNjyUkunrZKa36MpDy0TjAAAdLklEQVTg3dScViMinkDJfN/LUDC4RV0+NCJW9DiHzMyrB/aJJllE7ExpD/+xrmuzB/AC4JPAD4HTI+JrmXlWj2M8hJJhvIsSuGwQETsAd2fm0ojYH3gcpSnVxxu7vqD+7R+sjc/2QMrMO4F/iYivUDK//zPOQz2B8j2/izLgE8Dx9d/zgNdQarSuAdavr/0o/Wd71dBOO7XW9UGUYGadiPhHSj/Vr9ckDwJOjIhepba/G+awdwA0HigHMXyQ0t1K4C+UWtspKUo/of8APkz57pxVM3CvAp5dk93F0D3ufZSCoIO7mpE9hvIdviozv1vXXQUsBH5KqWF4CvAkSq1Vr/tjtxmV8a5NdLt/Z4fVZacg8e2U1hWjysybI2K4VhivomSiPk5pptptqmcUv025P43kW8Os/yKlcKOniFgC/CelYOOtlEKLfYDvRMQzM3O4+0BTJ2PYXYj5NGBWzWN0tq1kmojSr79nM/pGmuEKz16emc3Cif9LKVzfh1IbszPwloi4EHgZpVDj1hEaInwO6C4EnlaidCnYsb66nw2bdqXdgxI0L22ur8/mP3el3YryHIdGv+NamLIpcH5mdmoSp/pvvR97AV/pI903Rti2Ll0BXWbeEhGHAT+ohXF/qa81xFD/7SkTFJKZvtbCi1JqlaO8Xtm1z2jpe73+NNmfdcDX7TP1c+3eWLcu8CtK7cpmdd23KJm2PXsc47hhrtWZlCD+D/X1V2Cjus88Si3DqZN9DdbCNQ1K4DKW79UzGvsvqP8Hu9Rtz6UUfqzf+f+py48BSyf7866F63dJj+tzeWP7ZpSWAb1ei7qOtQuwE6U2ISmZmh2AV9T3W41yLh+jPOQn/boMc35bU0qrf9b4XuxECeBuB57X9Z15HCWD8foex5pNKbi5Dpg/zN87BrhxDOe3Vb3OL+1a/wvgsvrvzm/lsMm+nn18nnXqub6WUjjzaUrBVtZ73Q+BY4bZ95PAJxvv51EKgR5DaQK5Y9fre5TM4hO61u9Uv9ePn+zrMcq1uhk4bxz7/Rn43Ajb96U8S64FNm+sfwyl9voGYK8R9t+8/m46383DKV1NHlf/f++hFK7sCDy2pnlnff/I+ndGvG9M8nXfrOucm69rgS/3WP+ius/+jePs0lxHqTXMeu2OqP9+Zt2/06qo+R19HLD9ZF+PAVzP84FLutZ9Crih/vuk+tm3oxQeXEgpVOg+zhxqfqq+v6xzr6A0Gb+g/rvTzWG7+v7HzfvGdH1RagETeM449u08v6Ox7gPAzWM8zhx6xAKT+bKmcJyijIC1DSWj02yCM4eSgf4GMC8z74mI51M65u6YmVdHxBMpnf27R8R8MKV0976sbeIj4ljgKODhmXlt1zlcRu+mkdNSrc07sL79dWPTuyg39Vdl5k113YsoTX6+GhEHZeZXu9KfRKnZuYCSUTqYEsh8AdiQ8iA9l1Lr9VrKQ2Uh5cc+rdXa1icDp2fmXZmZEdGpNTiVcvOC8tD4CqX/S6cmZnfKA+aueqwHUTI2TV+sy7MpzVjmRsQ2lNLEB0XEm4HF9XV1Zr5hoB/wgfcmSgn0nsChlIzH/RGxGaWZWNOvgS0pgQWwqjbnt1lqb79NySR1/JJSwHFuI/0SSvOTEzLz9vq7eEwO1Yr33cfhgRQRu1AyeAsotcWd0s9r6vt3UmpOHgb8d+3790pKBuP93cfLzPsi4g2Uh/d9EbFtPeZ9lOAkKdciIqJTczqLoVE3l2edtqi23FiHoRLwhRFxBaWZ62WU+8u5tSVH5/puWt/Pqvv+OUdpQvhAq8+X+yk1z3MpBWe31W3LIuJeWNV8brOu3ddj9RqnLSgD0NzNmqOUQvl/XUnju1pF/dtBqSGbquZ1r4gy+MP3KPfFM3P4ERXXyCvV5rnHUJ4dvwGemo0m5Jn584h4EuV6fS8ivgq8LTN/3nWo11Nq0Per7/9Cuf5vA95NubZ7UwL8jkMpz7RZ9XO9m9KfcSrq5FH+lJlXNTdEGWhraY/1nd9zs2npcZSWF1+q759HKcxZQekO8v2s3T4i4iYoAxUO8HNMuojYm/Jd6O53+jfApV3rrqPcc99GKWjobslyOvDo+h2dRQmef1G3NWtuZ1PuBb+Z6PlPMWs0d695l62AN2Ud3bbW5r0Z+Hjj93035Xu3Lqv34VzZONY3KAVGvfx7Zh4+iA8xcJMdlU7XF6UJwkg1LS9tpO3UGnZqufajqzZshL9zOuUHuW6PbdcB35jsazHAa3pO4/o9uK7bv77/Wo/0O1Fq9+4H3gMs6Np+VN33kzXtbMqN8ZV1+7Pr9g9SAvvjJ/saDOg6frh+rhspTZk61/J+GrUGlBFtVyspozSpSODJ9X0Aj6A0hd6zbvtnSunsFpTAu/m9v5/SxO8/KRmmv5/s6zHA6/pSSt+izvt/7/G734ASNHevf3zdZz7lAfxvdf061BrXun0TSq3arcDedd3nKYHQiyb7GoxwbdajBH9/pPRl7f78FzTSzgL+H+UBuoJGafUIx9+8xzFHex3S2P94SvC3qqaQ0gT1y8Df1nXLGBqYKikZ0j9TBh1YQcnQT/q17nFt7qmf5zzgXzu/67rtgvo73GCYa3T2GP7Ob4BPTPbnncB1eg3wj13r9q3X4Ws0Sv270pwNvKPxfl1Kf/b/rfteRSlM25ahwrDmazfg+41rfnXzu0TpA3d+13fz9ZRmuu+sv/2FNe20qcVufL6e50wpREzgX3vs86S6ba/6fiElWG7+pp9HaUa6L+W+87TGtld3fgMz5UUplPkNpYnyXOCFdf0iynP3dfV9p6ZwTk33c0rAsmPX8Z5S7x0fAf6+7rN53dasKTyFUiDW2W+m1BQuoFQOLKjv31avwS00avYpswYk5RnwLko/917HezeNllKUCoiLWbMW/GbKuBVgTeGMcg7lRnUbpSbheEoNwTJK7UGzhGsLyo+204a7MyJjdw1MLzsAf8g1RzyEcrNd3mP9tBMRz6aMCHo5dfqNiHgGpTnp/wKviIhZWUtyI2J9yvU/iHJTOwLYNyIen5l315LxzkA0T6cEl9tl5hGdv5mZX4mICyiZhRsoJZHTXma+OiK+TMkMvpMS8J7ImiX/I+mkXZdSOnsP5SYK5Tv3B0ogcBfwBkpzwddRCjpGmwpjpriDkql+fpSRKi+r6+4ATs7MN9VSxpUM9SG8A4YGWclSGtkpkVxAqbndmFLrcHH9Oy+h3DM+FRF/zSk4CFJm3ln7r8ylfJ5PUH5Pm1D6+tzXSHs/8M6IuAj4LKXQ4abm8WpNwZ0Mzb13E+X7111TeDQlA7gFpQCjM7/WXFZvwbEpaw709QlKhv2Z9XgPz8y/1Nrx24B/y8zp0HLgXkrhwhMo98JSmlOmjuk84zul2Xtn5gWjHTAizqcEy03zgG0i4sCu9Ulp3pvjOvsHSGZ+qMfql1J+n4cMd/6Z2d2X8IWU784VlGfMZ+ivz/axlO/hSymBYcdDgdUGo8lacx4RVwP/k5k9+yRNc0+vy3+KiE/lCLXw9Xe5J/C/EXE8pU/3MZ3tUUZ8X9moYVy/ru/ue/3XnL5zUZ5GKWQ4iPLMfXcdP6GTj1zjuZCZKyPi5ZTg5PSI2KOTh8rM8yPiCErQtytl0JROTVhzOos5DI2C3zHtZy7I0ork5xGxUUT8F6XV0yXA8zPzt410H6/jebwHOJIyOOFbMvNT3Yfsen8vZQ7o7lrw4QasmxIMCscpG5OCRsRzgQs7P6iIuC1XnzR0MaW9dydj1AkKRwzoosy1syulTXgvG1CbCk1ntQnPqZSH7OnAeykPys9QrtGzKP3YTo0yiuZHKdf0Qkpt4G6UUpoPZGanydq+lMzmzZRS2j0oQdLB9W9uTyn12YvSr+FhwKURcQrwlZzmk91n5jci4pvA/2Fio3++l9K5v+lLjX9v18k4Rxktcg0RsV6WppMzwayI+H+Ue+csyiBGO1JqCWAoKNm4rp/dWL+GiFhM+W5+jVJbsD3wzEZA2Hmwv4BSQntmLfiYilPU3EztA5WZN0UZ3nw+ZTCEdeuAE+cwNJgWlAEojm8MDHFtZr6coQFlAO7KzPUo/bZW0wiumzXVsGaz+pmc8U7KXKPrU5p/3koZ+fdndft5WQYpGcsx76cU+Gw1Srr/ogSaUzogjIjHU+6FdzFUQBGU58p1lMCk166dpsPzgNMy85rM/GTnO1PTfKPruB+lBNSPqu87zZnvroUn/0apjeh4JKvfU5t2o3x3Z5QoU9b8C6U1y7qU6Wv+ITOHHe23sy0i9mH16wflHrrGoHOsWRD0j/Q3uMhU9Hbg0sz8cURcQikEeg+lwOw7zUCmKTN/GhEfoFzvQ1h9QLP/oPwG9mT1rjPN59VmrFmBMSPmgYyIl1BGYt2CUtDzmkYecpXMPKsWtB9ZX5+MiD3rs2ok82PN0cOn9LUzKJygWkPweOrE9BHxLeqACo1kj2P1ksFOP6TRavn2oNQGrjEaZpQR4uYxtuGcp6Qs/WJeTHmwduZ2+h0lsFuRmVdFxLMoN6pOxq5Tc7q8BuCr5nOrtYTHUx7WC2vatwFzIuKfKf0R96E0R3k1pQRuP0qzi48AH4kyLPnjchqPoJmZGRFfzdXnbDo6Io7uSvqlETKMR1FqfFZSrtOzKc1Hv02pKfw9rJpSYSPKENnvpgTt21GCnE9QmkNNSxHxcEqp9kspn/kQSvPHXSnNbpoB2vya5kBKDVlzfed4G1OuDZRa8K9SasxOowx20uw7BEBm3lYLn57J0AhxU81ulKZN3dNNdL5/e1IKaO6kfJ/2YWjQHSjfkU6p6m6UwPEwGoUSEXEkpTBnNbHm6IWbdAV5MznjnZT+Zudm5g0Atd/m+vSoPejTSkrNbD+FjtOhYPJRlMzcvZRn5n2UvuVzKM+Iwxpp51A++wpK4cI8SmD4TepvLzMvaqRfrTCh1gTkMK176FH4cDCl+WMvK4EHR5kC6B6G+kF1+rvOpQRV12RmPy2Ppoq3UPJFL6c0ez6f0s94/8z89ij73smaBUSdaSYeS8k7/DPleb6orn8EZdCqKT+i8HAy81eUAfeohTz/RCko3IWuOZ17OJZSw7haH/iaR/ga5d68cWNTc6L1bVlz/It+54me6h5cX6/MzNNHSlh/z8dExJmUcSp6tTzo9nim2RzjBoUTdwjlB9OZh+vHwL9FxHaZeU1EbEkZYfAdjX0eTClNH+0G9ZK67FXTs2FdzoQ5d+g0aYoyyEZn3cWNJJ0S685gO53SnF5B8espGb3DKCVpZObpMTRoT2cuo4/V/Xeox30+JRB9OvD76RwQNpwXEecx9P3rd6AZADLzZljVDOfv6+rbgC1rCeS36v9Z84FyOOXB/L+U38X3Bv2hHii18OUSym+2M6DH1jk079sbutIfTOkvtMbk0nVQhe9QMkOd0sKnZ51uISI+zTCZllqy/nvgIzn8nHOT7UrKZ+vMx9TJlL2MkrG+KjNXFXDVgTq2ysxjaqB8FKWmi6xD+EcZIKlZctupcd5pmHN4PuW73l0zPZMz3kHph/rpGBrivNMncrxDx99HyXjvMEq6z1AyPlPdGcBZnRYLtYnwUsrgJKs1D42Iwynf3Sdn5k9HOmjtxnBXjjJZev39rkMZRG616SQy85s1Ta9a2YWU38RdlN9Bp/CjM9DMOpTv5rMYfjqNKaW2ejiOUtj9yRqYPIVSYPS1Wnsz2oBOvZrqAdyamX+uzf3oPMMbzUina9PRXjamdFm6IIemR+opyzQJ22djmi9YNQDXkZR740si4ouZ+WVKX+s59b78GOBbUaYV+jglDzFT8p3/ERFfAh4VEbszVGDZyzrUUddpzKc7iu9n5l7NFRExpVuhGRROQEQ8ipLhOYaSgdiT0pF0FmUCy9dQhpqfRfmRdTyY0ZuObk/pr3B+d5vkqtO/a9rXFPZpO8qDoHsy1V4/zBdS+nr9oKvG4ljKA/Z5lNqH7maRHQflmhO9TztR5ovbgzIJbdZr8efO96mRgfxDY93mwxzuMIZG7zsFWFRvoisppby/oowSuTWlb9a0mUdrJLXp5qcoNQT3Ayd2grLaJO1uVp9X9L2Uvh4rGerjNqce6xcR8SdKrfVmwKE5NP8elIz9wlGa+X2ZNUeemxKyzBO4KhMdEbdRBtG5oo/dF1Nqn3s1lW/+xlfWv9VzVMHGA7dNGe+gNLsfrqT7gnEed0OGam6Hsz6lRm1K69GP7HDKc/htzZURMY/SzO7HfQSEc6jP316/2R6111D6tn9hDOd9I418Wky//q6riYgXUmpZlgMv62SsM/N3EfF0yu/wNZSBuDSMWqB2FuX3d0g/+/QICNelNP9eSWmh9Q3gtIi4qHbRISIOoTzDbgXel5kfoxQEzRj1u3cd/Tfr/ENmjtasftoyKByniNiU0uxrHUrm4Z2Um/X/UIaZf1FE/DtlAJTvZObPGrtvxAhBYe1j9xnKl/RtwyTr1MzMiIFm+rALcH2ffdNex5p9DqiZ+W9FmYwcYG6z1iUi9qLUak2diUQnphP0njCRg0SZbuIwyoiiB1CaOr6FMhHw7p02+FE6vW81UwLCjsw8DCAiXtpZVzOEF1MCibso99LONBRvp4wCeTvlNzyHUvK9Y2Y+q+7fK9OzJ6XvRuf712mu9nrKg3k9pnhJd73nddeedjLHh2Tmqb32qxnw/Xpt6046sTPs+bene8Z7FiWD/cnuDVEG0hp2UIhakHnFMK0i/kod9GsEn2Soyf+0UAO/AyjX5SO1r9CXM/PXlNrqh1IKc0dUm/A9nqH+hJ3v5rsozRif3kg+m5JXGHES9xaYT2lu+5LsmmKrtqz6B8o9b+fRDhQR61GeQT37ss9UtXb6C8DfUX73/RS6dR9jPqU5/RMpo7beUGvIP0EpsPxTlKmUjqY8769nZuc1t6F8vrs7+Zd673xwZj6mvu902ep32p05DN+ncMrGXlP2xKaBzlDlG1N+oGdRShfvrf0M76PcsObQlUmq+/T8gdUH1hcozfhOycz/HubvP6IuuztSzzgRsRPwEPosYe0KwHsZrUnVeJtcTRn1pv9SyjyB426+WftnnkZpLnIKJTP1E8p0DE+kPORnShDdt/o7XzfLSLdzKbWl9zDUr253SsDc99xO3Q/3KHPM3T3NmjHfSbk37tFYtw5l/qtB9OeZA9DjQdvRqemey/DNgGaaWcBTazDb7SGMXAL+esqgKFv22LYhpRn4aKZVZrEWYj2yBnT7U2oG3xNl8JjtKfO7fqfPY13SvS4illNGx5xRc+QNQpaRHD/XazCPuv0S6F3z2mUWpUnwMyjNG0ezzVjOc6qqeaH/pPSR/ZdeBUF9HGMDStPdXYGXd/IHmXleRDwqM/9aA8+vUlrAvIVS0H5/4xh/A1yXQ/NGT2uZ+Yc+0qykPFP6bZ03n+H7FE7JuYbBoHDcMvP+2tH3hszs1en5i5Qf3Usz84pas/g0SinME1h9IloAokxS/XlKxvJcylQXze3PqPtvxlCTgV8w83UGKRlU061Oie4OdVCAjq0HdPyp4DWUZnEnda3fpJGh7oyY+ZDGuu5rcBylL+ErWf1m+HZY1WRwWFGGr//CTKs9hJK5rIU4Z1KajS+hjPh4JuUBekFE/J/M/MkknuYD7T5Kv6lVGeLaTKmzbaI6/f1G67y/LjOk30sf5lAyH70Gy1nE6tNS7FeDn6QEfX/H8H1+Z1KfwjXU3+VPal/eLzD0WV9YnwsfzMw2PF8fUMMFhGOwDmUU0QCel5n/HRHPHC5xRPyQMrn7nUyzQT+aIuI5lGfL3cBza9+/Xjq/99n0mPogy6BlnwM+0x1U1oDwYZSAe1dg38z8Y48g/RXAiyNiwQD+P2ekzPyHyT6H8TAonIBeJYER8TyGaggPyaG5TG6hNCnbiaFaxOZ+/0ypkVmHElAe2KPz+mKGBgkB+Gxt8jKTdDJ9ARARTwJeRakRbV6z6FoOp9f2TnOq4Ya/HtPY7VNNlPnujqBk/JrXbBalxO91Xbt8gDVFrW3cC/hmHahnVVOy0YLBeoBZlEFrDmCK9oMbo41YvbR0F8rnewRlwJjf1s8cwAsoBTs/jogzKJN+n9841mwaTSFrX85llJrBnk0ko0xRsyGlP/Fv++zo/kC7H5jdVZO3znCJR1JLq/+BUpBxQGaeRcms/Ch7jNBa91lAGV58WtVejVetpQ7ghBGaj3au/8cofene3EjyW2rBUUQ8rq67m9IUOhiaPmk4cyjTtDyq/p11MrOfOfsmVUQ8klJI+wJKTekdlIKu71MKYw8GDq4BxQcpfdTvaez/aMr1uZc1mzQvAOZ2/QY6c2euT6lF7FVQ1MzMd77LO1Bq2Dt/u1PDsGnj+J0+r1f0c1+eBOOZ025er5W1i8JjKZ/5JZn51bppHUrf1s79eW5NP4dyj/4xJb80nWu2vkpprfPJUWqh12ksh6uR7S4s7gzG8yrgrZQ86r6Z+f26+S7KffhplLzsU4FLpnNAWGtMt6IUdt/NmvM4z6U8y7rnugxq01Dg3hyaCmQuY59uonPsKfMsNygcvC9S+hf+JDO/0VlZm5u9jvKQ/WHWUR0bzqA8nFYAb8o6wWiX/6TUMv4M+G5m/nJtfIBJ1nnozYmIJ1Davc8BXtfVn7BT+zBa++7ZrPlD7bwfrk/hdP9dvJdSm/yDHBrFcTblZvb2bEz6261xDdbJzDsi4skMDWo0r2vZ7a+UWsd9KH3odqfcKPtqijVV1e/hlyk38M5cWc8BzqY0ldynkRGeDcyppbFPpQxu9AbKAEnnR5mX8DRKk6frGn/mx9QmTl2lsk+oBUbd1mNqDq8+i3KdepXId2cORyt8+QRDtVBn1Ot5/HABIayakHg5lJYXmXl1V5IZlfGuAyHNZZha2GyMfJeZh1L6vw/ny5Tf+r31ePfQo0VLD/dQBghZh3Jdh7s/TKoo0x4dQKnR36yu/i3lN/rBRsDwvYjYmTJy9b7Akyj9A5tTJXyWUpMykuFqpb5LmY6lW/f9dQmlX/HdrD4A0l+oQWsj/bqU2rBLRzmnybDe6EmK2td6F4aahHb3ob6JMoXFLzNz1YAnmflFSt6rY1Vg1Bk0ZbqrlQRH9pG0+T3qa7qY2k3knZSg8PuUgLvZ//UcyvRd36Z8D39LHdl9Gnsc/Y2MPlIXrU9RuunA0Hymo6qB5ocZuoesMQbGZJnumd8pp5beHzvMtmG/gLV53XCjYXbS3MzQNBUzVWeqjfUozT1+A7w3M/+rK13nx7cRI5vPmu23R3tIrTvK9qnuZEpfouYkvWP9TOvCqgdR54a1XnNbD5+iTAnQDAJ/xvQfrexiSkbkMmrNa2aeE2X6iXPrQCUd6zJ07VYCb42IDwF/qOuW1oGkvsfqNbRPZmgqh+FKDaMeex5Ttx/nbOCmzFw1im1tPnonNaNWmxQ/hjJp8nB9q19D6e91CfBayiA8/wK8KiIuowzo9UfKqHhQnmUbAZtSam4fTckgP331I8+8jHcOaIqSnMEj6lXXU/qgXUfpE302cFGvGvfMvIzS1PaZlPlqu+fOexLl93rvMAW4q6ktCOZSnkXDZRybNTxkGZl4JswHt3AMaX9NmVLmJso8cKvVqNbC9f0Z/f7XvJZtaUYOQGa+gj4GSuraJyPi/wL/1as/bWZeztC0YDPFTynPidso36d+uzcEQ7/l5j7r0GermCzTpjyc0uT/y5SRX6eEmJotkKQiItbJNYcTn+gxHwSsO80G8BiTWvK3Xma26oGoyRURJ1OmdGkGhbMpfVwvzDItx36UvjG/BP41G3MX1vRBmWj6kcCumXldXT+XMkLpnpSBFraiZDjXp2S0g9IE6H5KbdfxmfnurmPvXP/uG3J6jCyqARqm9lhTRC1A2qzRJE+akeoYIv2OqP+AMSiUJE0ptTZ115YN0iNJ0qQxKJQkSZKkFhvPqFCSJEmSpBnCoFCSJEmSWsygUJIkSZJazKBQkiRJklrMoFCSJEmSWuz/AwKKhp38qnguAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_2_df['subject'].value_counts().plot(kind='bar', figsize=(15, 5), fontsize=20, rot=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f79a23312b0>"
      ]
     },
     "execution_count": 288,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4YAAAE7CAYAAACBuJ+PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGUBJREFUeJzt3X/QpeVdHvDry4LIBlnNZkm62LCZiQRqWYe4jRrBhA5tddJRscaYYZzW2jKjRoxGaGiboiRWbKCTxlZH6o/YKpBWFHXWiTElJEw1oYtTEpQgimu0G3STsBuy0e6SfvvHeTYeXg/s2T3n8O6+z+czc+be89zX88z98seZuXh+VXcHAACA8TptvRcAAADA+lIMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZurmJYVfdUVT/N5x1TuYuqandVHayqA1V1V1XtmHG8peYAAAA4cTXPewyr6muTvGDN5i9O8uYkP9Hd31VV5yV5YJi7JcmmJNcmOZhkZ3cfGI611BwAAACLOX2eUHe/a+22qvq+4Z/vHsYbkmxNcnl33zNkHklyR5Jrkty4ohwAAAALmOuM4cwdq+5N8tIk25IcSfLxJB/r7gunMqcP2/+su19SVWcsM3dCCwcAAOApTujhM1X1/CQvT/Ib3f2ZJDuTnJPkvulcdz+ZyeWgF1TV1hXkAAAAWNCJPpX0ymHfXx6+7xjGfTOyjw3j+SvIAQAAsKC57jGc4ZsyuXz014bvm4fxyIzs4anMsnNPUVVXJ7k6SZ7znOd8+YUXXrg2AgAAMAr333//x7t72zzZ4y6GVfWFSV6Z5J6pJ4MeGsYtM3bZMpVZdu4puvvWJLcmya5du3rPnj2z/wgAAIANrqr+eN7siZwx/PokZ+SvLiNNkkeH8dwZ+aMNdW+SXnIOAACABZ1IMfxHmRS3X5na9mAm7xe8bDpYVWcluSTJQ939eFU9sczcCawdAACANY7r4TNV9Zwkfz/JB7r7cw+GGZ4WenuS7VV1xdQuVyY5M8ltq8gBAACwuON6j2FVvTrJf0tyXXe/dc3c9iQfSlJJbk6yKcl1SQ4kubi7D64i93TcYwgAAIxZVd3f3bvmyR7vpaTfNIy/vHaiu/dV1aVJbklyfSaXm96d5PXTJW7ZOQAAABZzXGcMTyXOGAIAAGN2PGcMT/QF9wAAAGwQiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcsf7ugpOcTveuHu9l8ApZO9Nr1rvJQAA8CxwxhAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARm7uYlhVr6uqh6vqM1X1O1X1tWvmt1fV7VX1iap6oqreU1WXzDjOUnMAAAAsZq5iWFVvSvL2JO9Ncl2Sz0uyu6q+cpg/O8n7knxDkh9P8kNJ/laS91XVjqnjLDUHAADA4k4/VqCqzkvyr5Pc1N3/ctj27iQPJ/nOJB9Ick2SFyf59u5+x5C5d5j7N0n+6XC4ZecAAABY0DxnDF+byRnCnzy6obt/P8m2JN8zbPq2JE8k+YWpzAeT/F6SV1fV560oBwAAwILmKYZfnUlJ+4Kq+kBV/WVVfSTJK7v7U1W1NcmFSf53dx9Zs++eJGcnuXjZueP4GwEAAHgG8xTDFyU5kuRXk/yvJP8qyeYk76yqr0iyY8jtm7HvY8N4/gpyAAAALME8xfDsJM9N8uPd/T3dfUsml5eelr8qicmkPK51eBg3ryD311TV1VW1p6r27N+//2n+HAAAAKbNUwyPFrTbj27o7v+Z5DNJLklyaNi8Zca+R7cdWkHur+nuW7t7V3fv2rZt26wIAAAAaxzzqaRJPjGMf7lm+yeTPC/Jo8P3c2fse7Sd7U3yR0vOAQAAsATznDH83WHcsWb7Fyb50+4+MGQuGd4/OO3lST6V5MFl5+ZYNwAAAHOYpxj+2jB++9ENw4vtz07ywWHTf83klRbfOpV5eZIXJvnvU08XXXYOAACABc1zKenuJO9N8t1V9flJHkry/UmeTPKjQ+Y/JvlnSd5eVecnOZjkDZmc3Xvz1LGWnQMAAGBBxyyG3d1V9Q1JfiTJt2Ty8vkHk3xHd394yByqqlck+fdJvjuTs30fTPKG7v7jqWMtNQcAAMDi5jljmO5+Isnrhs/TZfZl6tLPZysHAADAYua5xxAAAIANTDEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkFEMAAICRUwwBAABGbq5iWFWXV1U/zedtU7mLqmp3VR2sqgNVdVdV7ZhxvKXmAAAAOHGnz5n7omH8t0keWTP3e0lSVecluXfYdlOSTUmuTfL+qtrZ3QdWkQMAAGAxx1sMf767H3qazA1Jtia5vLvvSZKqeiTJHUmuSXLjinIAAAAsYN57DJ87jH82a7KqzkjymiQPHy1xgzuTHExy1SpyAAAALG7eYvhFSY4kuaqqPlpVh6vqw1X1dcP8ziTnJLlveqfufjLJA0kuqKqtK8gBAACwoOMphmckuS7Jj2dymecXJ/nVqnpJkh1Dbt+MfR8bxvNXkHuKqrq6qvZU1Z79+/c/zZ8CAADAtHnvMdybZHeS7+rujyZJVf1RktszeSDM+4bckRn7Hh7GzcNnmbmn6O5bk9yaJLt27erZfwoAAADT5jpj2N0/2t3/8GgpHLxnGL80yaHh31tm7H5026EV5AAAAFjQvGcMZ3liGJ+b5NHh3+fOyG0bxr1Jesk5AAAAFnTMM4ZVtbWqHpx+kf1g+zD+aZIHM3la6GVr9j0rySVJHurux1eQAwAAYEHzXEr6yUzO3H1LVX3+1PZvHcZfH54WenuS7VV1xVTmyiRnJrkt+dxTRZeWAwAAYHHVfexntFTV9yZ5W5IPZlLKXpTkdUkeTvKy7v5MVW1P8qEkleTmJJsyeYrpgSQXd/fB4VhLzT2dXbt29Z49e+b+DzEWO964e72XwClk702vWu8lAABwgqrq/u7eNU92rnsMu/s/VNWnk3x/JiXt8SQ/m+T67v7MkNlXVZcmuSXJ9ZncJ3h3ktdPl7hl5wAAAFjM3A+f6e6fTvLTx8h8JMkxTzEsOwcAAMCJm/cF9wAAAGxQiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHKKIQAAwMgphgAAACOnGAIAAIycYggAADByiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHKKIQAAwMgphgAAACOnGAIAAIycYggAADByJ1QMq+raquqqesea7RdV1e6qOlhVB6rqrqraMWP/peYAAAA4cacf7w5V9ZIkN87Yfl6Se4evNyXZlOTaJO+vqp3dfWAVOQAAABZzXMWwqk5L8rNJDif5/DXTNyTZmuTy7r5nyD+S5I4k1+SvyuSycwAAACzgeC8l/b4kX5XJmbvPqaozkrwmycNHS9zgziQHk1y1ihwAAACLm7sYVtUFSd6c5KeSvHvN9M4k5yS5b3pjdz+Z5IEkF1TV1hXkAAAAWNBcxXC4hPRnknw8yRtmRHYM474Zc48N4/kryK1d59VVtaeq9uzfv3/GrgAAAKw17xnD703y1Un+eXd/asb85mE8MmPu8FRm2bmn6O5bu3tXd+/atm3bjF0BAABY65jFsKpenOSHk/xSkgeq6gVJjraus4bv/3f4vmXGIY5uOzR8lpkDAABgQfOcMbw0yVlJvinJx4bP0Xv/vmX4vn34fu6M/Y+WyL1JHl1yDgAAgAXN87qK30zydWu2PT/JO5K8J8ktSX4nyQ8muWw6VFVnJbkkyUPd/XhVPZHJU0WXkptj7QAAABzDMc8Ydvf/6e53TX+SvG+YPjr350luT7K9qq6Y2v3KJGcmuW041pPLzAEAALC443rB/TG8Ocmrk7yzqm5OsinJdUn+JMmPrTAHAADAApZWDLt7X1Vdmsmlpdcn6SR3J3l9dx9cVQ4AAIDFnFAx7O69SWrG9o8kedUc+y81BwAAwImb9z2GAAAAbFCKIQAAwMgphgAAACOnGAIAAIycYggAADByiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHKKIQAAwMgphgAAACOnGAIAAIycYggAADByiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHJzF8OquqCqfr2qDlXVx6rqnVX1xWsyF1XV7qo6WFUHququqtox41hLzQEAAHDiTp8nVFVnJ3lPkk5yQ5LnJPn+JC+tqp3d/RdVdV6Se4ddbkqyKcm1Sd4/ZA4Mx1pqDgAAgMXMVQyTfHeSv5nk4u5+MEmq6kCStyX5+iTvzKQwbk1yeXffM2QeSXJHkmuS3Dgca9k5AAAAFjDvpaR/J8neo6Vw8PvDuKOqzkjymiQPHy1xgzuTHExyVZIsOwcAAMDi5iqG3f3N3f2iNZu/dBj/JMnOJOckuW/Nfk8meSDJBVW1dQU5AAAAFnTcTyWtqr9RVa9Ncn2SP0zyq0l2DNP7Zuzy2DCev4Lc2rVdXVV7qmrP/v37n/6PAAAA4HNO5HUV/znJbUn+Msk3dvenk2we5o7MyB8exs0ryD1Fd9/a3bu6e9e2bdue8Y8AAABg4kSK4Q8neUOSz2byhNAvS3JomNsyI39026EV5AAAAFjQvE8l/Zzu/u0kv11V92ZyD+CNmTxBNEnOnbHL0VN3ezN53cUycwAAACzomGcMq+p5VfWRqnr7mqnfHcaXJHkwk6eFXrZm37OSXJLkoe5+fAU5AAAAFjTPpaSfTPK8JN9YVWdObX/ZMP7h8LTQ25Nsr6orpjJXJjkzk3sSs+wcAAAAizvmpaTd/f+q6keS3JzkvVV1R5IvSPK6TB4E88ND9M1JXp3knVV1c5JNSa7L5HUWPzZ1yGXnAAAAWMC87zG8Jck3J6lMCtv3ZHJ/4cu6+7eGzL4klyb5QCavsrg2yd1Jvqa7D04da6k5AAAAFjP3w2e6+84kdx4j85Ekr5rjWEvNAQAAcOJO5HUVAAAAbCCKIQAAwMgphgAAACOnGAIAAIycYggAADByiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHKKIQAAwMgphgAAACOnGAIAAIycYggAADByiiEAAMDIKYYAAAAjpxgCAACMnGIIAAAwcoohAADAyCmGAAAAI6cYAgAAjJxiCAAAMHJzF8OqurCqdlfVE1V1qKreXVUXr8lcNGQOVtWBqrqrqnbMONZScwAAAJy40+cJVdXWJPckeU6StybZlOS6JL9ZVRd096eq6rwk9w673DRkrk3y/qra2d0HhmMtNQcAAMBi5iqGSb4jyfOTvLa770iSqjqS5M1Jvi3Jf0pyQ5KtSS7v7nuGzCNJ7khyTZIbh2MtOwcAAMAC5r2U9CuG8d1T2+4fxgur6owkr0ny8NESN7gzycEkVyXJsnMAAAAsbt5i+EtJ3pTk8alt24fxk0l2JjknyX3TO3X3k0keSHLBcDnqsnMAAAAsaK5LSbv7F6a/V9XpSb4zSSf5lSQvGqb2zdj9sWE8P8mOJec+sWZdVye5Okle+MIXztgVAACAtY77dRVVdVqSn0jy5Une2t2/k2TzMH1kxi6Hh3HzCnJP0d23dveu7t61bdu2Z/w7AAAAmJj34TNJkqo6M8nPZXL/308meeMwdWgYt8zYbctUZtk5AAAAFjR3MayqzUl2J3llkrd095umph8dxnNn7Hr01N3eTC49XWYOAACABc37HsPTMnki6CuSfFd3/8SayIOZPC30sjX7nZXkkiQPdffjVfXEMnPz/YkAAAA8k3nvMbwmydcmuWFGKTz6tNDbk2yvqiumpq5McmaS21aRAwAAYHHV3c8cmJyl+2gmZxd/IMln10Q+3d2/WFXbk3woSSW5OcmmJNclOZDk4u4+OBxvqbmns2vXrt6zZ88c/wnGZccbd6/3EjiF7L3pVeu9BAAATlBV3d/du+bJznMp6fOTPG/490/NmP/jJL/Y3fuq6tIktyS5PpP7BO9O8vrpErfsHAAAAIs5ZjHs7r2ZnLU7pu7+SJJjnmJYdg4AAIATd9zvMQQAAGBjUQwBAABGTjEEAAAYOcUQAABg5OZ6wT0APKMf3LLeK+BU8oMeLg5wsnHGEAAAYOScMQQA4KR08c9dvN5L4BTy4X/84fVewinNGUMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkTvuYlhVX1JVn32auZdX1fuq6tNV9fGq+rmq2rrqHAAAACfu9HlCVXVakhcneWmSt2RGoayqL0tyd5LHkrwpyblJ3pDkb1fVV3b3kVXkAAAAWMxcxTCTUvbwMTJvTbIpyd/r7keSpKoOJLkpyWuT/JcV5QAAAFjAvJeSfjLJ1w2fD6+drKoXJLkiyf84WuIGPzOMV60iBwAAwOLmKobdfbi739Xd78qkJK711UkqyX1r9tuf5KNJvmpFOQAAABa0rKeS7hjGfTPmHkvyBVX13BXkAAAAWNCyiuHmYZz1QJjDU5ll556iqq6uqj1VtWf//v3HXDQAAADLK4aHhnHLjLktU5ll556iu2/t7l3dvWvbtm3HXDQAAADLK4aPDuO5M+a2JTnY3Y+vIAcAAMCCllUMfytJJ/ma6Y1V9aIkLxjmV5EDAABgQUspht3950l+I8nLqurCqanXDuNtq8gBAACwuHlfcD+Pf5Hk8iS/UVVvy+Qy0DckuT/JHSvMAQAAsIBlXUqa7v5QJkVub5K3JLk6kwL3D7r7yVXlAAAAWMxxnzHs7lc+w9xvJ3nFHMdYag4AAIATt7QzhgAAAJyaFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARk4xBAAAGDnFEAAAYOQUQwAAgJFTDAEAAEZOMQQAABg5xRAAAGDkTvpiWFUXVdXuqjpYVQeq6q6q2rHe6wIAANgoTl/vBTyTqjovyb3D15uSbEpybZL3V9XO7j6wbosDAADYIE7qYpjkhiRbk1ze3fckSVU9kuSOJNckuXH9lgYAALAxnLSXklbVGUlek+Tho6VwcGeSg0muWo91AQAAbDQnbTFMsjPJOUnum97Y3U8meSDJBVW1dT0WBgAAsJGczMVwxzDumzH32DCe/+wsBQAAYOM6me8x3DyMR2bMHV6TSZJU1dVJrh6+frqqHl7R2th4npfk4+u9iJNN/eh6rwBOeX5bZvmhWu8VwKnOb8sM9U/8tsww94m0k7kYHhrGLTPmtqzJJEm6+9Ykt65yUWxMVbWnu3et9zqAjcVvC7AKfltYhZP5UtJHh/HcGXPbhnHvs7MUAACAjetkLoYPZvL00cumN1bVWUkuSfJQdz++HgsDAADYSE7aYjg8ffT2JNur6oqpqSuTnJnktnVZGBuVS5CBVfDbAqyC3xaWrrp7vdfwtKpqe5IPJakkNyfZlOS6JAeSXNzdB9dxeQAAABvCSV0Mk6SqLkxySyaXlHaS9yZ5fXfvXc91AQAAbBQnfTEEAABgtU7aewxh1arqoqraXVUHq+pAVd1VVTvWe13AxlFVX1JVn13vdQAbl98ZluVkfo8hrExVnZfk3uHrTZncv3ptkvdX1c7uPrBuiwNOaVV1WpIXJ3lpkrfE/4QFlszvDKugGDJWNyTZmuTy7r4nSarqkSR3JLkmyY3rtzTgFHdukofXexHAhuZ3hqVzjyGjU1VnJPl4ko9194VT208ftv9Zd79kvdYHnNqq6vOS/N3h67/L5CnatY5LAjYYvzOsgjOGjNHOJOck+ZXpjd39ZFU9kORrqmprd39iXVYHnNK6+3CSdyVJVb1xnZcDnEKq6uwkZx8jdrC7/yJ+Z1gy1yMzRjuGcd+MuceG8fxnZykAAJ/zA0k+dozPa9ZtdWxozhgyRpuH8ciMucNrMgAAz5afT/KBY2Q+/GwshPFRDBmjQ8O4ZcbcljUZAIBnRXf/QZI/WO91ME4uJWWMHh3Gc2fMbRvGvc/OUgAAYP0phozRg0kOJrlsemNVnZXkkiQPdffj67EwAABYD4oho9PdTya5Pcn2qrpiaurKJGcmuW1dFgYAAOvEewwZparanuRDSSrJzUk2JbkuyYFM3gV0cB2XB2wQVXVPkld4vxiwKn5nWBYPn2GUuntfVV2a5JYk1yfpJHcneb1SCADA2DhjCAAAMHLuMQQAABg5xRAAAGDkFEMAAICRUwwBAABGTjEEAAAYOcUQAABg5BRDAACAkVMMAQAARu7/A5Wc/62Er9IWAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_df['sentiment_value'].value_counts().plot(kind='bar', figsize=(15, 5), fontsize=20, rot=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "烧机油         69\n",
       "好           40\n",
       "高           37\n",
       "不错          37\n",
       "异响          29\n",
       "大           28\n",
       "差           26\n",
       "低           22\n",
       "省油          20\n",
       "便宜          18\n",
       "强           18\n",
       "优惠          18\n",
       "噪音大         16\n",
       "噪音          16\n",
       "响           14\n",
       "够用          13\n",
       "小           12\n",
       "贵           12\n",
       "好看          12\n",
       "还可以         12\n",
       "满意          12\n",
       "舒服          12\n",
       "喜欢          12\n",
       "空间大         11\n",
       "没问题         11\n",
       "差不多         10\n",
       "正常           9\n",
       "不行           9\n",
       "费油           8\n",
       "可以           8\n",
       "一般           7\n",
       "下降           6\n",
       "最好           6\n",
       "胜出           6\n",
       "不高           6\n",
       "完败           6\n",
       "好点           6\n",
       "很好           6\n",
       "更好           6\n",
       "硬伤           6\n",
       "给力           6\n",
       "手机导航         5\n",
       "没有           5\n",
       "非常好          5\n",
       "爆震           5\n",
       "不烧机油         5\n",
       "鸡肋           5\n",
       "还行           5\n",
       "太差           5\n",
       "软            5\n",
       "            ..\n",
       "油耗上升         1\n",
       "噪音较大         1\n",
       "啥都好          1\n",
       "不算问题         1\n",
       "控制不了         1\n",
       "不费劲          1\n",
       "比森林人强不少      1\n",
       "油耗没那么高       1\n",
       "内饰太L0W       1\n",
       "不舒服、异响       1\n",
       "还是现款好看       1\n",
       "安全感极高        1\n",
       "不逊于          1\n",
       "不怎么好看        1\n",
       "差点追尾         1\n",
       "发动机噪音确实大     1\n",
       "没动力          1\n",
       "空调不制冷        1\n",
       "不擅长          1\n",
       "追不上          1\n",
       "正常价格         1\n",
       "更强           1\n",
       "好开           1\n",
       "还好           1\n",
       "负担大          1\n",
       "在意           1\n",
       "更高           1\n",
       "下机油          1\n",
       "差别没那么大       1\n",
       "舒服的很         1\n",
       "废油           1\n",
       "不细致          1\n",
       "掉了链子         1\n",
       "背光           1\n",
       "上了一个档次       1\n",
       "换油           1\n",
       "距离短          1\n",
       "超前           1\n",
       "没宽敞          1\n",
       "灵巧           1\n",
       "差的不是一点       1\n",
       "舒服安静         1\n",
       "坑爹           1\n",
       "空间/后座又宽，     1\n",
       "用的很好         1\n",
       "价格差不多        1\n",
       "摩擦音          1\n",
       "性价比不算很高      1\n",
       "走烂路很颠        1\n",
       "取消           1\n",
       "Name: sentiment_word, Length: 1219, dtype: int64"
      ]
     },
     "execution_count": 289,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df['sentiment_word'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.1 切词保存csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0fe2bdd38a4246c8ac8e493de14a2abc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=2753), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /tmp/jieba.cache\n",
      "Loading model cost 1.021 seconds.\n",
      "Prefix dict has been built succesfully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f37f17acbc1a43da8e938b5b6eb1d982",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 只在第一次进入cell时运行\n",
    "\n",
    "def cut(cut_df):\n",
    "    chars = []\n",
    "    words = []\n",
    "    for s in tqdm(cut_df['content']):\n",
    "        chars.append(' '.join(list(s)).strip())\n",
    "        words.append(' '.join(jieba.cut(s, cut_all=False)).strip())\n",
    "    os.makedirs('../../data/csvs', exist_ok=True)\n",
    "    cut_df['word'] = words\n",
    "    cut_df['char'] = chars\n",
    "    return cut_df\n",
    "\n",
    "test_cut_df = cut(test_df)\n",
    "train_cut_df = cut(train_df)\n",
    "\n",
    "test_cut_df.to_csv('../../data/csvs/test_public.csv', index=None)\n",
    "train_cut_df.to_csv('../../data/csvs/train.csv', index=None)\n",
    "\n",
    "test_cut_df = pd.read_csv('../../data/csvs/test_public.csv')\n",
    "train_cut_df = pd.read_csv('../../data/csvs/train.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "# elmo 训练语料\n",
    "os.makedirs('../../data/elmo_corpus/', exist_ok=True)\n",
    "with open('../../data/elmo_corpus/word.txt', 'w') as f1, open('../../data/elmo_corpus/char.txt', 'w') as f2:\n",
    "    for i in pd.concat((train_cut_df['word'], test_cut_df['word'])):\n",
    "        f1.write(i+'\\n')\n",
    "    for i in pd.concat((train_cut_df['char'], test_cut_df['char'])):\n",
    "        f2.write(i+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data3/jzzhou/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /data3/jzzhou/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py:118: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.\n",
      "Instructions for updating:\n",
      "Use `tf.global_variables_initializer` instead.\n",
      "case 2\n",
      "(1, 6, 1, 64)\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "#case 2\n",
    "input = tf.Variable(tf.random_normal([1,7,5,1]))\n",
    "filter = tf.Variable(tf.random_normal([2,5,1,64]))\n",
    " \n",
    "op2 = tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='VALID') # 1,3,3,1\n",
    "\n",
    "\n",
    "init = tf.initialize_all_variables()\n",
    "with tf.Session() as sess:\n",
    "    sess.run(init)\n",
    "    print(\"case 2\")\n",
    "    print(sess.run(op2).shape)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.2 处理Label-十种四分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7ada154f45204cc38aa81f8da5090883",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "sub_list = ['subject_价格', 'subject_内饰', 'subject_动力', 'subject_外观', 'subject_安全性',\n",
    "            'subject_油耗', 'subject_空间', 'subject_舒适性', 'subject_操控', 'subject_配置']\n",
    "pickle.dump(sub_list, open('../../data/sub_list.pkl', 'wb'))\n",
    "columns = ['content_id', 'content', 'word', 'char', 'sentiment_word']\n",
    "columns.extend(sub_list)\n",
    "train_df_final = pd.DataFrame(columns=columns)\n",
    "\n",
    "for i in tqdm(range(len(train_cut_df))):\n",
    "    c_id = train_cut_df.iloc[i]['content_id']\n",
    "    subject_str = 'subject_' + train_cut_df.iloc[i]['subject']\n",
    "    sent_val = train_cut_df.iloc[i]['sentiment_value']\n",
    "    \n",
    "    if c_id in set(train_df_final['content_id'].values):\n",
    "        train_df_final[subject_str][train_df_final['content_id'] == c_id] = sent_val + 2\n",
    "\n",
    "    else:\n",
    "        sub_dict = {k:0 for k in sub_list}\n",
    "        sub_dict[subject_str] = sent_val + 2\n",
    "        content_dict = {\n",
    "            'content_id': c_id,\n",
    "            'content': train_cut_df.iloc[i]['content'],\n",
    "            'word' : train_cut_df.iloc[i]['word'],\n",
    "            'char' : train_cut_df.iloc[i]['char'],\n",
    "            'sentiment_word' : train_cut_df.iloc[i]['sentiment_word'],\n",
    "        }\n",
    "        content_dict.update(sub_dict)\n",
    "        train_df_final.loc[train_df_final.shape[0]] = content_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df.shape\n",
    "jp_df['content_id'] = train_df['content_id']\n",
    "en_df['content_id'] = train_df['content_id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tmp_df = en_df.loc[list(train_df_final['content_id'].values)]\n",
    "# tmp_df\n",
    "en_df.drop_duplicates(subset='content_id', inplace=True)\n",
    "jp_df.drop_duplicates(subset='content_id', inplace=True)\n",
    "jp_df.to_csv('../../data/csvs/round2zh2jp.csv')\n",
    "en_df.to_csv('../../data/csvs/round2zh2en.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10654, 5)"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "en_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10654, 15)"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df_final.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1918\n",
      "12572\n",
      "10654\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1472"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(len(train_df[train_df.duplicated(['content_id', 'content'])]))\n",
    "print(len(train_df))\n",
    "print(len(train_df_final))\n",
    "\n",
    "\n",
    "multi_item_df = train_df_final[train_df_final[train_df_final[sub_list] > 0].count(axis=1) > 1]\n",
    "len(multi_item_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.8819973718791064,\n",
       " 0.019110193354608597,\n",
       " 0.07966960765909517,\n",
       " 0.019222827107189786)"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计类别比\n",
    "labels = train_df_final.iloc[:, 5:].values\n",
    "labels_dict = {}\n",
    "\n",
    "for idx,l in enumerate(labels):\n",
    "    n_l = np.sum(l>0)\n",
    "    labels_dict[n_l] = labels_dict.get(n_l, 0) + 1\n",
    "\n",
    "\n",
    "n_c0 = np.sum(labels == 0)\n",
    "n_c1 = np.sum(labels == 1)\n",
    "n_c2 = np.sum(labels == 2)\n",
    "n_c3 = np.sum(labels == 3)\n",
    "n_total = n_c0 + n_c1 + n_c2 + n_c3\n",
    "w_c0 = n_c0 / n_total\n",
    "w_c1 = n_c1 / n_total\n",
    "w_c2 = n_c2 / n_total\n",
    "w_c3 = n_c3 / n_total\n",
    "w_c0, w_c1, w_c2, w_c3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df_final.to_csv('../../data/csvs/train_multi.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 363,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    7208\n",
       "2     793\n",
       "3     151\n",
       "1     138\n",
       "Name: subject_油耗, dtype: int64"
      ]
     },
     "execution_count": 363,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAD6CAYAAABApefCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADR9JREFUeJzt3X+oX/V9x/Hny12pbq63+XFHxYHZELfSHxnbtYjGaW6XMLvo6kahdDI3/wgURpFKx/rHwP0x0P1Rpky6BloQEQbdKGsTWpsmmVPRdjeMRKgrdcXRjCLfqyyp2GasvvfHPWPea5vv517v9Zh8ng/4kvN9n8O9n/MlPjn3JMekqpAkXdguGnsBkqTNZ+wlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6MDP2Av7P9u3ba8eOHWMvQ5LOK8ePH1+qqrlpx71lYr9jxw4WFxfHXoYknVeS/EfLcd7GkaQOGHtJ6sDU2Cf5uST/mOTJJH+VZHuSx5M8k+Te4ZimmSRpHC1X9n8APF1V1wPvBj4LHAJ2AjcnuRq4q3EmSRpBS+z/C7gsyc8AlwLXAYer6lXgMWA3sNA4kySNoCX2XwR+G/h34FngDHB62HcG2Apsa5ytkGR/ksUki5PJZL3nIEmaoiX2nwI+U1U7WA721cDssG8WWBpeLbMVqupAVc1X1fzc3NS/JipJWqeW2P888KNh+yzwFLA3yUXAjcAx4EjjTJI0gpbYPwh8LMlTLN+zvw34IHASOFRVzwEPNM4kSSOY+gRtVT0PXL9qfMOqY5ZaZm+2HX92aMxv3+z5e39n7CVIusD5UJUkdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHpsY+yU1Jnhhe30tyR5KDSU4keTjLLmmZvRknJEl6vamxr6p/qqpdVbWL5X88fBY4VVU7gS3AHuD2xpkkaQTNt3GS/CxwFXAtcHgYHwV2AwuNM0nSCNZyz34PcATYBpweZmeArWuYrZBkf5LFJIuTyWTtq5ckNVlL7G8BDgJLLN/KYfh1aQ2zFarqQFXNV9X83Nzc2lcvSWrSFPvhD1dvYvl2zBFg77BrATi2hpkkaQStV/bXAN+qqh8BjwBXJDkJvMRy1FtnkqQRzLQcVFXfBG4dts8C+1Yd0jqTJI3Ah6okqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI6YOwlqQPGXpI60PoPjv9pkqeTfCXJLyR5PMkzSe4d9m9vmUmSxjE19kl+GXh3VV0LfAX4a+AQsBO4OcnVwF2NM0nSCFqu7D8AbEnyz8ANwC8Bh6vqVeAxYDew0DiTJI2gJfZzwKSqfhP4ReD9wOlh3xlgK7CtcbZCkv1JFpMsTiaTdZ+EJOncWmJ/Bvj2sP1d4Hlgdng/CywNr5bZClV1oKrmq2p+bm5uPeuXJDVoif1xYH7Yvorl8O9NchFwI3AMONI4kySNYGrsq+op4MUk/8Jy6P8Q+CBwEjhUVc8BDzTOJEkjmGk5qKo+tmp0w6r9Sy0zSdI4fKhKkjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8Zekjpg7CWpA8ZekjowNfZJrklyKskTw2tnkoNJTiR5OMsuaZm9GSckSXq9liv7LcBnqmpXVe0CrgFOVdXOYd8e4PbGmSRpBDMNx2wBfj/J7wLfA/4b+Pth31FgN3Al8A8Ns69tzLIlSWvRcmX/HPDnVfV+4HLg94DTw74zwFZgW+NshST7kywmWZxMJus+CUnSubXE/nng66/ZfhWYHd7PAkvDq2W2QlUdqKr5qpqfm5tbx/IlSS1aYv8J4CNJLgLeA9wN7B32LQDHgCONM0nSCFpi/zfAHwPfAL4IfA64IslJ4CWWo/5I40ySNIKpf0BbVd8Hblo13rfq/dnGmSRpBD5UJUkdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1AFjL0kdMPaS1IHm2Cf5RJKvJ9me5PEkzyS5d9jXNJMkjaMp9kmuBO4Y3t4FHAJ2AjcnuXoNM0nSCFqv7O8HPjVsLwCHq+pV4DFg9xpmkqQRTI19ko8CJ4BvDaNtwOlh+wywdQ2z1V97f5LFJIuTyWS95yBJmqLlyn4f8AHg74DfALYDs8O+WWBpeLXMVqiqA1U1X1Xzc3Nz6z0HSdIUU2NfVR+tql3AR4DjwIPA3iQXATcCx4AjjTNJ0gjW81cvHwA+CJwEDlXVc2uYSZJGMNN6YFU9D/zW8PaGVfuWWmaSpHH4UJUkdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHjL0kdcDYS1IHpsY+yUySLyR5Msnnk1yS5GCSE0kezrKm2ZtxQpKk12u5sv8QcKKqrgcuB/4EOFVVO4EtwB7g9saZJGkELbH/KvDpJDPAO4BfBw4P+44Cu4GFxpkkaQRTY19VL1fVK8CTwAvANuD0sPsMsHUNM0nSCFru2W9L8jbgOpZvx7wHmB12zwJLw6tltvpr70+ymGRxMpm8kfOQJJ1Dy22cu4EPV9WPgVeAvwT2DvsWgGPAkcbZClV1oKrmq2p+bm5u3SchSTq3ltg/CNyZ5CngReBzwBVJTgIvsRz1RxpnkqQRzEw7oKr+k+Ur89fat+r92caZJGkEPlQlSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR0w9pLUAWMvSR1oin2Sh5I8neRLSS5LcjDJiSQPZ9klLbPNPhlJ0k82NfZJdgEzVXUt8HbgTuBUVe0EtgB7gNsbZ5KkEbRc2b8A3P+a4+8BDg/vjwK7gYXGmSRpBFNjX1XfqapvJrkNeBX4V+D0sPsMsBXY1jhbIcn+JItJFieTyRs6EUnSTzfTclCSW4GPA7cAfwvMDrtmgSXgssbZClV1ADgAMD8/X+s6A7157pmdfsxbwT2npx8jdablnv07gU8C+6rqB8ARYO+wewE4toaZJGkELffs7wAuBx5N8gRwMXBFkpPASyxH/ZHGmSRpBFNv41TVfcB9q8afXfX+LLCvYSZJGoEPVUlSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHWgKfZJLk7y5WH7kiQHk5xI8nCWNc0291QkST/N1NgnuRQ4DuwZRrcDp6pqJ7BlmLfOJEkjmBr7qvphVb0PODWMFoDDw/ZRYPcaZpKkEaznnv024PSwfQbYuobZCkn2J1lMsjiZTNaxFElSi/XEfgmYHbZnh/etsxWq6kBVzVfV/Nzc3DqWIklqsZ7YHwH2DtsLwLE1zCRJI1hP7B8BrkhyEniJ5ai3ziRJI5hpPbCqrhp+PQvsW7W7dSZJGoEPVUlSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSB4y9JHWg+X+EJmljvfeh9469hKmeueOZsZfQ5NlffdfYS2jyrn97drTv7ZW9JHXA2EtSB4y9JHXA2EtSB4y9JHXA2EtSBzYt9kkuSXIwyYkkDyfJZn0vSdK5beaV/e3AqaraCWwB9mzi95IkncNmxn4BODxsHwV2b+L3kiSdw2Y+QbsNOD1snwF+ZfUBSfYD+4e3Lyf59iauZ6NsB5Y28gvmvo38auedDf88+Ytu7xhu/O/NP+r2s4TN+L25OXezr2w5aDNjvwTMDtuz/IQPraoOAAc2cQ0bLsliVc2PvY4LhZ/nxvGz3FgX2ue5mbdxjgB7h+0F4Ngmfi9J0jlsZuwfAa5IchJ4ieX4S5JGsGm3carqLLBvs77+iM6r207nAT/PjeNnubEuqM8zVTX2GiRJm8wnaCWpA8a+kU8Eb7wkDyV5OsmXkvgP6axTkpkkX0jyZJLPj72eC0WSi5N8eex1bBRj384ngjdQkl3ATFVdC7yd//+bW1q7DwEnqup64PIkvzb2gs53SS4FjnMB/Xdu7Nv5RPDGegG4f9j29+Eb81Xg08NPR+9g+SFGvQFV9cOqeh9wauy1bBR/dG439Ylgtauq7wAkuQ14FfjauCs6f1XVywBJvgF8v6q+O/KS9BbkFVW7qU8Ea22S3Ap8HLilqv5n7PWcr5JsS/I24DpgSxJ/6tTrGPt2PhG8gZK8E/gksK+qfjD2es5zdwMfrqofA68Al468Hr0FGft2PhG8se4ALgceTfJEkjvHXtB57EHgziRPAS8Cj468Hr0F+VCVJHXAK3tJ6oCxl6QOGHtJ6oCxl6QOGHtJ6oCxl6QOGHtJ6sD/AoqP4Z/c8tZaAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 1.3 处理Label-组合单分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 370,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>content</th>\n",
       "      <th>subject</th>\n",
       "      <th>sentiment_value</th>\n",
       "      <th>sentiment_word</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>class</th>\n",
       "      <th>c_numerical</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>vUXizsqexyZVRdFH</td>\n",
       "      <td>因为森林人即将换代，这套系统没必要装在一款即将换代的车型上，因为肯定会影响价格。</td>\n",
       "      <td>价格</td>\n",
       "      <td>0</td>\n",
       "      <td>影响</td>\n",
       "      <td>因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...</td>\n",
       "      <td>因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...</td>\n",
       "      <td>价格-1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4QroPd9hNfnCHVt7</td>\n",
       "      <td>四驱价格貌似挺高的，高的可以看齐XC60了，看实车前脸有点违和感。不过大众的车应该不会差。</td>\n",
       "      <td>价格</td>\n",
       "      <td>-1</td>\n",
       "      <td>高</td>\n",
       "      <td>四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...</td>\n",
       "      <td>四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...</td>\n",
       "      <td>价格-0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>QmqJ2AvM5GplaRyz</td>\n",
       "      <td>斯柯达要说质量，似乎比大众要好一点，价格也低一些，用料完全一样。我听说过野帝，但没听说过你说...</td>\n",
       "      <td>价格</td>\n",
       "      <td>1</td>\n",
       "      <td>低</td>\n",
       "      <td>斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...</td>\n",
       "      <td>斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...</td>\n",
       "      <td>价格-2</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>KMT1gFJiU4NWrVDn</td>\n",
       "      <td>这玩意都是给有钱任性又不懂车的土豪用的，这价格换一次我妹夫EP020可以换三锅了</td>\n",
       "      <td>价格</td>\n",
       "      <td>-1</td>\n",
       "      <td>有钱任性</td>\n",
       "      <td>这 玩意 都 是 给 有钱 任性 又 不 懂车 的 土豪 用 的 ， 这 价格 换 一次 我...</td>\n",
       "      <td>这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...</td>\n",
       "      <td>价格-0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>nVIlGd5yMmc37t1o</td>\n",
       "      <td>17价格忒高，估计也就是14-15左右。</td>\n",
       "      <td>价格</td>\n",
       "      <td>-1</td>\n",
       "      <td>高</td>\n",
       "      <td>17 价格 忒 高 ， 估计 也 就是 14 - 15 左右 。</td>\n",
       "      <td>1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。</td>\n",
       "      <td>价格-0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         content_id                                            content  \\\n",
       "0  vUXizsqexyZVRdFH           因为森林人即将换代，这套系统没必要装在一款即将换代的车型上，因为肯定会影响价格。   \n",
       "1  4QroPd9hNfnCHVt7      四驱价格貌似挺高的，高的可以看齐XC60了，看实车前脸有点违和感。不过大众的车应该不会差。   \n",
       "2  QmqJ2AvM5GplaRyz  斯柯达要说质量，似乎比大众要好一点，价格也低一些，用料完全一样。我听说过野帝，但没听说过你说...   \n",
       "3  KMT1gFJiU4NWrVDn           这玩意都是给有钱任性又不懂车的土豪用的，这价格换一次我妹夫EP020可以换三锅了   \n",
       "4  nVIlGd5yMmc37t1o                            17价格忒高，估计也就是14-15左右。      \n",
       "\n",
       "  subject  sentiment_value sentiment_word  \\\n",
       "0      价格                0             影响   \n",
       "1      价格               -1              高   \n",
       "2      价格                1              低   \n",
       "3      价格               -1           有钱任性   \n",
       "4      价格               -1              高   \n",
       "\n",
       "                                                word  \\\n",
       "0  因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...   \n",
       "1  四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...   \n",
       "2  斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...   \n",
       "3  这 玩意 都 是 给 有钱 任性 又 不 懂车 的 土豪 用 的 ， 这 价格 换 一次 我...   \n",
       "4                   17 价格 忒 高 ， 估计 也 就是 14 - 15 左右 。   \n",
       "\n",
       "                                                char class  c_numerical  \n",
       "0  因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...  价格-1            1  \n",
       "1  四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...  价格-0           18  \n",
       "2  斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...  价格-2           24  \n",
       "3  这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...  价格-0           18  \n",
       "4            1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。  价格-0           18  "
      ]
     },
     "execution_count": 370,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def generate_label():\n",
    "    df = train_cut_df.copy()\n",
    "    label_stoi = {}\n",
    "    def combine_label(a, b):\n",
    "        return '{}-{}'.format(a, b+1)\n",
    "    df['class'] = df.apply(lambda row: combine_label(row['subject'], row['sentiment_value']), axis=1)\n",
    "    for c in df['class'].value_counts().index:\n",
    "        label_stoi[c] = len(label_stoi)\n",
    "    df['c_numerical'] = df.apply(lambda row: label_stoi[row['class']], axis=1)\n",
    "    return label_stoi, df\n",
    "label_stoi, df = generate_label()\n",
    "pickle.dump(label_stoi, open('../../data/single_label_stoi.pkl', 'wb'))\n",
    "df.to_csv('../../data/csvs/train_single.csv')\n",
    "df[:5]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.4 处理Label-先分主题，再分情感"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>content</th>\n",
       "      <th>subject</th>\n",
       "      <th>sentiment_value</th>\n",
       "      <th>sentiment_word</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>sub_numerical</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13149</td>\n",
       "      <td>因为森林人即将换代，这套系统没必要装在一款即将换代的车型上，因为肯定会影响价格。</td>\n",
       "      <td>价格</td>\n",
       "      <td>1</td>\n",
       "      <td>影响</td>\n",
       "      <td>因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...</td>\n",
       "      <td>因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2288</td>\n",
       "      <td>四驱价格貌似挺高的，高的可以看齐XC60了，看实车前脸有点违和感。不过大众的车应该不会差。</td>\n",
       "      <td>价格</td>\n",
       "      <td>0</td>\n",
       "      <td>高</td>\n",
       "      <td>四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...</td>\n",
       "      <td>四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1652</td>\n",
       "      <td>斯柯达要说质量，似乎比大众要好一点，价格也低一些，用料完全一样。我听说过野帝，但没听说过你说...</td>\n",
       "      <td>价格</td>\n",
       "      <td>2</td>\n",
       "      <td>低</td>\n",
       "      <td>斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...</td>\n",
       "      <td>斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8865</td>\n",
       "      <td>这玩意都是给有钱任性又不懂车的土豪用的，这价格换一次我妹夫EP020可以换三锅了</td>\n",
       "      <td>价格</td>\n",
       "      <td>0</td>\n",
       "      <td>有钱任性</td>\n",
       "      <td>这 玩意 都 是 给 有钱 任性 又 不 懂车 的 土豪 用 的 ， 这 价格 换 一次 我...</td>\n",
       "      <td>这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11784</td>\n",
       "      <td>17价格忒高，估计也就是14-15左右。</td>\n",
       "      <td>价格</td>\n",
       "      <td>0</td>\n",
       "      <td>高</td>\n",
       "      <td>17 价格 忒 高 ， 估计 也 就是 14 - 15 左右 。</td>\n",
       "      <td>1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   content_id                                            content subject  \\\n",
       "0       13149           因为森林人即将换代，这套系统没必要装在一款即将换代的车型上，因为肯定会影响价格。      价格   \n",
       "1        2288      四驱价格貌似挺高的，高的可以看齐XC60了，看实车前脸有点违和感。不过大众的车应该不会差。      价格   \n",
       "2        1652  斯柯达要说质量，似乎比大众要好一点，价格也低一些，用料完全一样。我听说过野帝，但没听说过你说...      价格   \n",
       "3        8865           这玩意都是给有钱任性又不懂车的土豪用的，这价格换一次我妹夫EP020可以换三锅了      价格   \n",
       "4       11784                            17价格忒高，估计也就是14-15左右。         价格   \n",
       "\n",
       "   sentiment_value sentiment_word  \\\n",
       "0                1             影响   \n",
       "1                0              高   \n",
       "2                2              低   \n",
       "3                0           有钱任性   \n",
       "4                0              高   \n",
       "\n",
       "                                                word  \\\n",
       "0  因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...   \n",
       "1  四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...   \n",
       "2  斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...   \n",
       "3  这 玩意 都 是 给 有钱 任性 又 不 懂车 的 土豪 用 的 ， 这 价格 换 一次 我...   \n",
       "4                   17 价格 忒 高 ， 估计 也 就是 14 - 15 左右 。   \n",
       "\n",
       "                                                char  sub_numerical  \n",
       "0  因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...              1  \n",
       "1  四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...              1  \n",
       "2  斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...              1  \n",
       "3  这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...              1  \n",
       "4            1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。              1  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def generate_label():\n",
    "    df = train_cut_df.copy()\n",
    "    label_stoi = {}\n",
    "    for c in df['subject'].value_counts().index:\n",
    "        label_stoi[c] = len(label_stoi)\n",
    "    df['sub_numerical'] = df.apply(lambda row: label_stoi[row['subject']], axis=1)\n",
    "    df['sentiment_value'] = df.apply(lambda row: row['sentiment_value']+1, axis=1)\n",
    "    return label_stoi, df\n",
    "label_stoi, df = generate_label()\n",
    "\n",
    "os.makedirs('../../data/csvs', exist_ok=True)\n",
    "pickle.dump(label_stoi, open('../../data/subject_label_stoi.pkl', 'wb'))\n",
    "df.to_csv('../../data/csvs/train_single_label.csv')\n",
    "df[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['subject', 'sentiment_value', 'sentiment_word', 'word', 'char',\n",
       "       'sub_numerical'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns[2:]\n",
    "for l in df[2:]:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 1.5 多标签分类，主题情感一起分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>vUXizsqexyZVRdFH</td>\n",
       "      <td>因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...</td>\n",
       "      <td>因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...</td>\n",
       "      <td>价格_0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4QroPd9hNfnCHVt7</td>\n",
       "      <td>四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...</td>\n",
       "      <td>四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...</td>\n",
       "      <td>价格_-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>QmqJ2AvM5GplaRyz</td>\n",
       "      <td>斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...</td>\n",
       "      <td>斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...</td>\n",
       "      <td>价格_1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         content_id                                               word  \\\n",
       "0  vUXizsqexyZVRdFH  因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ...   \n",
       "1  4QroPd9hNfnCHVt7  四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点...   \n",
       "2  QmqJ2AvM5GplaRyz  斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全...   \n",
       "\n",
       "                                                char  class  \n",
       "0  因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...   价格_0  \n",
       "1  四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...  价格_-1  \n",
       "2  斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...   价格_1  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEfCAYAAACgW2ZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xu4XFV9//H3JwQIoKRJOEAaBRTKRRCUBkptuCSQeAsoYIFWLFcDlkIVRKkVjC1W0Vqr4A+lVUQaLEpKC5F7Ei7RCg0IAbmJLdRjkScIEkRAhO/vj7WG7EzmnLP3PmfOmbA/r+eZZ2b2rFmzZu+1v7P2WmvvUURgZmavbOPGugBmZtZ9DvZmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gDjx7oALZtttllss802Y10MM7N1yu233/54RPQNla5ngv0222zD8uXLx7oYZmbrFEmPlEnnbhwzswZwsDcza4BSwV7SRZJ+IOkKSa+StEjSXZIuVjKhzLJufxkzM+tsyGAvaQYwPiL2AjYFjgX6I2I3YBIwGziy5DIzMxsDZVr2jwFfLKSfD1yfny8BZgKzSi4zM7MxMGSwj4gfR8Rtkg4GXgJ+CDyVX14FTAamlFxmZmZjoGyf/UHAKcCBwM+BifmlicDj+VZmWXu+8yQtl7R85cqVdb+DmZkNoUyf/ZbA6cDciHgaWAzMyS/PApZWWLaGiLggIqZHxPS+viHPCTAzs5rKtOyPAqYC10paBqwPTJO0AniCFNQXlFxmZmZjQL3yh+PTp0+P1hm025zx3Y5pHv7MO0ezSGZmPU/S7RExfah0PqnKzKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBSgV7SetLujI/3k/Ssnz7qaSjJO0hqb+wfAdJEyQtknSXpIslqbtfxczMBjJksJe0EXA7MBsgIm6MiBkRMQNYAfwQmASc31oeEQ8ARwL9EbFbfn12t76EmZkNbshgHxHPRsSuQH9xuaSNge0iYgUpmB8q6TZJC3MrfhZwfU6+BJg5skU3M7OyhtNnPxtYnB8/BJwZEXsCU4F9gSnAU/n1VcDk9gwkzZO0XNLylStXDqMoZmY2mOEE+wOBRfnxw8ANhcebA48DE/Oyifn5GiLigoiYHhHT+/r6hlEUMzMbTK1gn7tp9iN1zwCcChwhaRywC3APqdU/J78+C1g6rJKamVltdVv2ewD3RsRz+fl5wDHArcDlEXEvsACYJmkF8ASru3zMzGyUjS+bMCK2Kzy+DTio8PxRUku/mP55YO7wi2hmZsPlk6rMzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBSgV7SetLujI/3kNSv6Rl+baDpAmSFkm6S9LFStZa1t2vYmZmAxky2EvaCLgdmJ0XTQLOj4gZ+fYAcCTQHxG75ddnD7DMzMzGwJDBPiKejYhdgf68aBJwqKTbJC3MLfZZwPX59SXAzAGWmZnZGKjTZ/8QcGZE7AlMBfYFpgBP5ddXAZMHWLYGSfMkLZe0fOXKlTWKYmZmZdQJ9g8DNxQebw48DkzMyybm552WrSEiLoiI6RExva+vr0ZRzMysjDrB/lTgCEnjgF2Ae4DFwJz8+ixg6QDLzMxsDNQJ9ucBxwC3ApdHxL3AAmCapBXAE6RA32mZmZmNgfFlE0bEdvn+UWC/tteeB+a2vaXTMjMzGwM+qcrMrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBqgVLCXtL6kKwvPL5L0A0lXSBovaQ9J/ZKW5dsOkiZIWiTpLkkXS1L3voaZmQ1myGAvaSPgdmB2fj4DGB8RewGbAnOAScD5ETEj3x4AjgT6I2K3/PrsLn0HMzMbwpDBPiKejYhdgf686DHgi23vnwQcKuk2SQtzK34WcH1+fQkwc+SKbWZmVVTus4+IH0fEbZIOBl4CrgMeAs6MiD2BqcC+wBTgqfy2VcDk9rwkzZO0XNLylStX1v0OZmY2hFoDtJIOAk4BDoyI3wIPAzfklx8GNgceBybmZRPz8zVExAURMT0ipvf19dUpipmZlVA52EvaEjgdmBsRT+fFpwJHSBoH7ALcAywm9edD6tJZOvzimplZHXVa9keRumquzTNvjgXOA44BbgUuj4h7gQXANEkrgCdIwd/MzMbA+LIJI2K7fH8OcE6HJPu1pX8emDucwpmZ2cjwSVVmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg1QKthLWl/SlfnxBEmLJN0l6WIlpZZ196uYmdlAhvzDcUkbAbcC2+dFRwL9ETFX0iJgNrBVyWXXjfg3mD9xgOVPjfhHmZmtq4Zs2UfEsxGxK9CfF80Crs+PlwAzKywzM7MxUKfPfgrQajavAiZXWLYGSfMkLZe0fOXKlTWKYmZmZdQJ9o8Drb6Tifl52WVriIgLImJ6REzv6+urURQzMyujTrBfDMzJj2cBSyssMzOzMVAn2C8ApklaATxBCupll5mZ2RgYcjZOS0Rsl++fB+a2vVx2mZmZjQGfVGVm1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gAO9mZmDeBgb2bWAA72ZmYNUPrPS14J3njRGzsuv/uou0e5JGZmo8stezOzBqgV7CXtJ2lZvv1U0ick9ReW7SBpgqRFku6SdLEkjXThzcysnFrBPiJujIgZETEDWAE8CZzfWhYRDwBHAv0RsRswCZg9YqU2M7NKhtWNI2ljYDvgMeBQSbdJWphb8bOA63PSJcDMYZXUzMxqG26f/WxgMfAQcGZE7AlMBfYFpgBP5XSrgMntb5Y0T9JySctXrlw5zKKYmdlAhhvsDwQWAQ8DN+RlDwObA48DE/Oyifn5GiLigoiYHhHT+/r6hlkUMzMbSO1gn7tq9iN10ZwKHCFpHLALcA+pxT8nJ58FLB1WSc3MrLbhtOz3AO6NiOeA84BjgFuByyPiXmABME3SCuAJUvA3M7MxUPukqoi4DTgoP36U1Movvv48MHc4hTMzs5Hhk6rMzBrAwd7MrAEc7M3MGsDB3sysARzszcwawMHezKwBHOzNzBrAwd7MrAEc7M3MGsDB3sysARr1H7RV3LfjTh2X73T/faNcEjOz4XPL3sysARzszcwawN04I+TLJy5Za9lJX5k1BiUxM1ubW/ZmZg3glv0Y+Pzha1/m/7RLF41BScysKdyyNzNrAAd7M7MGqBXsJe0hqV/SsnzbTdIiSXdJuljJhPZlI114MzMrp27LfhJwfkTMiIgZpD8f74+I3fJrs4EjOywzM7MxUHeAdhJwqKR3AT8FfgNcll9bAswEtgYWti27rn5Rzcysrrot+4eAMyNiT2AqcAjwVH5tFTAZmNJh2RokzZO0XNLylStX1iyKmZkNpW6wfxi4ofD4JWBifj4ReDzf2petISIuiIjpETG9r6+vZlHMzGwodYP9qcARksYBuwCnAXPya7OApcDiDsvMzGwM1A325wHHALcClwNfA6ZJWgE8QQr0CzosMzOzMVBrgDYiHgX2a1vcflro8x2WmZnZGPBJVWZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gAO9mZmDeB/qupx/Wfc0nH5az6z9yiXxMzWZQ72ryDz588vvXzxkm07pt1/1k9GsERm1ivcjWNm1gAO9mZmDeBgb2bWAA72ZmYN4GBvZtYADvZmZg3gYG9m1gCeZ29D2nLpnR2X/3zmm0a5JGZWl1v2ZmYNUDvYS7pI0g8kXSFpD0n9kpbl2w6SJkhaJOkuSRdL0kgW3MzMyqvVjSNpBjA+IvaSdCMwFTg/Ij5VSHM80B8RcyUtAmYD141Ama3HbXPGd9da9vBn3jkGJTGzlrp99o8BX8yPxwGTgEMlvQv4KfAeYBawMKdZAszEwd7a+IfBbHTUCvYR8WMASQcDLwH3A2dGxHclfR/YF5gCPJXfsgrYYfjFtUabP7HDsqfWXmZma6k9G0fSQcApwIHABkBrysbDwObA40Br75yYn7fnMQ+YB7DVVlvVLYqZmQ2h1gCtpC2B04G5EfE0cCpwhKRxwC7APcBiYE5+yyxgaXs+EXFBREyPiOl9fX11imJmZiXUnY1zFGlQ9lpJy4BfA8cAtwKXR8S9wAJgmqQVwBOk4G9mZmOgbp/9OcA5bYs/1ZbmeWBuzXKZDcsbL3pjx+V3H3X3KJfErDf4pCozswZwsDczawAHezOzBnCwNzNrAAd7M7MGcLA3M2sAX8/eGu++HXfquHyn++9ba9mXT1zSMe1JX5nVcfnnD1979vFply7qmLb/jFvWWvaaz+zdMa1ZVQ72Zuug+fPnl1oGsHjJth2X7z/rJyNYIut17sYxM2sAt+zN7GVV/oKy0+WpwZeo7lUO9mY2Kir9d0GFy1l3ujTGQJfF6Ob4TK9zsDczG6ZOA/Ew8GD8WHCwNzMbRZ1mXUH3Z155gNbMrAEc7M3MGsDdOGZmPazKORWDccvezKwBHOzNzBrAwd7MrAG6FuwlTZC0SNJdki6WpG59lpmZDa6bLfsjgf6I2A2YBMzu4meZmdkguhnsZwHX58dLgJld/CwzMxuEIqI7GUvXAp+LiBskHQ/sEREntKWZB8zLT3cAHuiQ1WbA4yU/dl1L2yvl6IW0vVKOXkjbK+VY19L2SjlGO+3WEdE35Lsjois3YAFwaH58GvCpmvksf6Wm7ZVy9ELaXilHL6TtlXKsa2l7pRy9kLbTrZvdOIuBOfnxLGBpFz/LzMwG0c1gvwCYJmkF8AQp+JuZ2Rjo2uUSIuJ5oPN1P6u54BWctlfK0Qtpe6UcvZC2V8qxrqXtlXL0Qtq1dG2A1szMeofPoDUzawAHezOzBnCwNzNrgJ69nr2kA0hn3U4mnUiwNCLW+gfgsunqpO9G2qrlraKbeXdLt8q8Lq4Ls27qyQFaSReRdtIlwCpgImmu/uMRcXTVdHXSdyNt1fJW0c28a5Sl7A9fV8rcS+vCrFf0arD/YUS8ucPyOyPiTVXT1UnfjbRVy5tfuwI4AOgvLgYiIravm3fZfGukrfIjWXa9lf78KvnWzLsr662KLm47r4seSlsn/WB6NdhfCfyUdCG1p0gBYw7wmog4sGq6Oum7kbZqefN71iedJr3bSKyzqvnWSFsl0JZdb6U/v0q+NfPu1nqrEjC6VQavix5KWyf9oHn1aLDfkHSJ5FnAFFJXwGJgQUT8pmq6Oum7kbZqeQvvmxARzw28xurlXSbfqmkrBtoq67hKWavWi9J51yhL2fVWNQiMeBmqpu1W3uviuuiVdTyoGMaFdcbqBlw/kunqpO9G2qrlHam8gbNHansAGwLHkS6XcQ3wL8AxwAYj8d2GW9aR3HYjud7y8wk18ihdhm6l7cZ6rrMuqpRhpLfdaJRhuGXu2dk4Q5gwwunqpO9G2qrlHam896nxuR3zjnSZjK/l21okXR8RZf/IplOZh1vWgfKtk3bE1htA5NabpLMj4uMl86hShm6lHUjt9RyDtGRHoA7BCG+7USrDsMq8rs6zL9v3VLWPqkr6bqSt06fWzbxHugxQbSfpVpm7tZ2rGijvkQi0vaBb69l1qIZ1NdjbyBrN/wcebsXvpf8y7oWyVClDt9KOtpEInr3w/aqWYVhlXleDfdkv3c2V2Y20dTbmSOS9osbn1inDSOQ73LIOlG+dtN1ab1XKV6UM3Uo7kF74cRko39Gs8yNVhuGVeSQGQXrhRpqqNaVC+s2B7fPjPwBe1/b6acBH8+NXFZZvCPxFhc85ru35toXHW+b7ccC8EVgHI5Y3aw66jsi6yO+5pUSak4F965R1rG8DlWWo+taW9svdKMMopz2uZLpK2zq/p2MdqhoDhvp+Vfankdr3qtblKul7tmUvaW6+7S1pd0m/J2lCW5ozJe2en54I/L2kb0m6RNJlbWnfJ2kXSTsp/SfuNOB9+eWtgRvairAfsEzSucARki7Oy38LvLOQ7w2SrpE0TdJ1Hb7KYW3Pv5LfNw74BkBEvAQcPJx1UTfvQRTz348S6yJ/9j6SlB9vJ2mrfNs4l2fv/Nqdkq7ItyslXSVpRs7mIeDLdcoq6eacX6fbIkl/Wydt1bLUqG8vi4iT2pdJur5qGbqZtkq9r7qty9ah/HrpGFDl+2VV9qeR2ve6Nqmkl2fjfIQ0de/VpLnak4A3S/pERFya0+wE/I6k44DfANsARwP/ChzRlt+vgP1Js0R2Ab5F2umIiG9L+su29L8krcgfRcQ/S3p3Tvtiroct6wFXkdbl85JuBN6c378p8CyApF2AF4D1JG1POrRbv/B4sH7IQdfFMPMeSPE9ZdcFwCdILSxIZ9H+O6mFsxcwvZDuyYg4SNIk4NmIeE7SbpLOBF4CNqhZ1mdi4BPUBCyX9OmI+HXFtFXLUrW+DaVbg5J105aq91nVbV22DkG1GFDq+1XZn7qw73UtfS8H+xcj4u+KC5SuuXIqcKmkPyKdjv9F4FJSMNwyIh6R9GxEPNKW3w3Aw8C7gB2B3YCdJbWu2fKf+TPGkfrGfgYsA36QX99Q6cy+9g34e8C7gVYr4vL8+iLSP3U9nZf/JfAcaYf/i5zPNoXHtdfFMPMeUI11AfBS5ONL4IGIOCXn9W5JG0aamknhfe8BDpb0EvAq4DbSjvrPNYvd2mEvBDYjdZ88Brw20pm9v18zbVWl6lsFPTGjo6BsvYfq27pUHaoRA8qqsj91Zd/rhp4L9pK2AX4+wMs/IgU4SBXqHcDOpK6EC4EtJJ0FvC7f/11E/BYgIp6WdF9EzJK0ICLeK+mKiDio+AER8VKuROeS+lY/KOk9wHOttJKuKrzlZ8D3SK2JqaRKHKy9c54OHALsHBGn5JbjtoWKfFVb+irronLeJajGumiVu/VDsGt+TH5+AvD2/HyqpD8jtcZaR2q3AG8inZT1oSplzZ8rVk86mBYRc5TmZB/U3g1SJW1FgvL1rUtGY6JB2XoPNbZ1yTpUKQZU+H5V9qeR3ve6NqmkF/vsvwlcDbxJ0gpJd0j6vKQdI+LRiLgfICL+inR4/BCpT/kK4H5Sa+FE4Hbgxba8b5H0WuCT+TD6a5JOavUPtkTEU8DrgD1JlfNUUnxQW5CAdNGvHYEHgPVJg5aw9kYYB2wMPCzph6QumX8nZbwenbstSq2LmnkPqtUvWnZdSPp9STfltEfm7pEHSS2ck/N9scvkb4BnSC3AXwG/Jo0B3AD8B/DVYnkkbVt4vGW+HydpXqEPdz3SEcgaX6XtnhppS5F0XLE/mZL1rWz2A3zmAZKmFJe1laGYdvPcvYCkP5D0urb+79MkfTQ/flVh+YaS/qIt37L1Hkpu66p1qGYM6LReTpa0b+H7VdmfKu97JetyKZXSVxn5Hc0b6bK45JV1KHArcFpbGpF+zfcHPscgI/GkX/+bgVNIrYmbSS2Kb5Iq7S5t6d8C/G1+vH9h+XjgjwvP7yb1y24NXJnzX0IKikuBKwpppwLXAdM7lG/qMNdF5bxJLaO5wN7A7qRD87VOU6+wLjYF3k/a2Q4F/gn4Ur59pb0cpJ308vz6QaQ/VD4zv/Y1YNNC2uvz/TjgmsLyq9vyPAbYFziH1KVwZb5d1+F7lU6b099A6iqYNkB+1w6jvu0DL1+rajtgq3zbuMPnnAnsnh9fRtoHvgVcAlxWSPc+UjDcCTie1Kfe2o6HAT9py/dKoHUkdzxwcV6+Xof1XLreV9nWNepQqRgA3En6Mbgil/cqYEZ+7e3APXX3pyppK9blmwt1sv22qLUty956rhunYAOASBeuWijpauAySRtExKclfR/4BWln3Q/4b+DC3GgSsGFEzCnkdzLwGuCtpAGVHYG/Ix1O/jmpZXICgKRzSK2P90p6gdTPeHnOJ0gVveV50gW/WuaQ+n7/jLQxd8h5Xprf+yvg1LbG3bj8fQ+puS7q5j3kIHiVdRERqyQdRjoaWR4RC3MeHwCWRcSjbZ9/X0QcLGlf0k79DCkIEBHH5fdWGSw7BvhT0gDfGZH3mCJJ60UaWC6dtrCoyqBk6fqWdWNQsluTEqBkvS8Yclvnx6XrUMUYUHqQuMr+VDFt1cHcEZ1A0MvBfmHxSUT8WtJ7gTPy87coXR1vCfB5UutpGfCFiHihPbOIOFGpL/abpA3zKlLL51NAf0R8tpB8KemQ8yDS1RJ/Taq8F5J2+H+SdE2k63dcSzokfYbUKl7jmh1aPS3tQ6Qd5GDSRv0f4K5WMlYfBldeF8PIe6iB39LrgrSj70laF9cDkyXtRarcAfybpMMi4of5c9YrlOsoUosS0o/YZyPiu/l56QGwiLgwlwulaYHtV7hs7XxzqqQtLCs9KFmxvkF3BiW7NSkBytf70tta0lZUqEMVY0CVQeIq+1OVtFUHc0d2AkGVw4DRvAHbkw5jJ5IPbwdItwuwB2nHPAgYP0ja3wW2zSt7r5z/1nmDvb5D+iNZ3eo9FngbKQjsOkD+G7Y9PxH4cNuyfUk7/Xu6sC5K5Z0r2ARy91Dba1OBHauuC2ALUuv0e6QW7FtI3SRH5dvJwH6F/MYBby4834TU9bEFKRi3lv9O/rwl+bmAqwqvX9WhrOOAcR2Wjy+WoUbaW4FPA2cBy4G/ZvCui9L1jRTcWl0Mj7Fml8PVhXSfBv6XdIQwmdTn/aNcpv/J9+ML6Zfl+wX5/or275qXTyT9MO0D/FteL1cOtp7L1vsK27pSHaoSA4D7SEcdRxVurye1ur9Mh6uyUmFfLZO2Sl3Or12TH1/XqiPF+6q3ym8YrRvpMO7rpNbTkly5/ws4vC3dpLwhty+Zbz+pRdef83/5cSHN8XmjvIG0Ez+cV/53Sb/gHyukvTPncSFr9/sdwdr9cGuVkzTA9fbhroucdrcS6+Dm/L2eJLXo7iC1jDoF+dLrovCefYB3VtjWB5C6g47q8NpkUsvn68APSd0l8/Jr6wE3dHjP0nzbnkK/JimA3zKMtD/O2+BwUuvtb/L2OI0Owb5MfSO1zm4i9Wm3+q1vYfUPw9a0Ba+8Di4kBarjSEd+byN1Gb2TQoOA9OPw2vz9/pLUyj+JDo2G/Lkfzt/vw6QjFlEIPHXrfZltXbcOUSIGAH9CGgM4JN//cV7Hm5KO3i7s8J5K+ypD7HtV6nKufx/Pj6/rdF/1VvkN3b6RDvHeQDpE3RJYv/Da7sBX2zbyEmAeqYVzSb59C/iPAfJf41ey/XF+fhfpcPsdpMPl5Xn5ivw5GxTS/kfh8dIOn7eU1OrYvu3zP0Jqhf0NsCvw2HDWRV72BuCu/HhKvt8E+JcB1sXSfD/YwG/pdZGXn0W6hv2FuVIXb2tdWoHUGvp3YItB6kTVAbBWy+nbpBbXbQNto4ppKw1KVqhv3RqU7MqkhLL1vs62rlqHqBADKDFITP19tdS+V6UuU3ECwVC3XuyzP5xUmXcmVeQtJW1Kat1+PiJag6hHA+8lbYg7SCuh2O+6Rj+1pIX59V0kXULqu7yk8FlFT5CC3DOkFsxdkuaRdpD7SBXjfZLG0zbuIenrhaevJgWIfUj9oQ+S+vcg9fF+CDgP+Ac6z6cvtS4KzgAez4M3CyXNIg0abtUhbxhi4LfKuijkuTdpsLDlraQuCgFLJH01Il6Q9Km8LjYitXJOKAxutQbXPlZ38DkPem0QETe19cfHMNKWHpSsUt+ie4OSXZmUUKHet14rta0L7y9bh46mQgyg3ISAuvvqkPtexcHcOhMIBtVzwT5WD0x9I/IfVOfK9X7gGkmHR0Q/qX9vCunv7pZL+iypi+Fu4LsR8Yu2rE8hbbzvkHaChaRDqnF5WdHGpP7WX5G6MV5D6uf+DKlCfSunezGXaWPSjImzSYNLLeNJg2kTgNMl/S7pZI9TSYfmt0t6OiJ+KWmtP2uosC6QdCQpML8mIkJSRLo2B5IGOqlkqIHfKuuikE38LH/usaTKe0mk2Q9Hx+qBs/8kBaD1STto+wDV+vm+9ABYHqw7Cdg8Ih6U9P5WXrksqpO2oPSgJCXrWzcHJaN7kxLK1vuWstu6UPRSdah0DKgwIeA2Ku6rFfa90nU56k0gGFTPBXsASScAZ0k6jxR4PsTqfsRzgYMj4hxJ/wT8o6SDSYd9k0iHoudIOisiWhfsolB5NiXNSng16Qw+AZtKmhURrVkKN5FabDeTNsZGeSPeSmqVHUCaUx2S9s7p/4RUqd8PXEQ6tH8Q+H+kAbRNSSd5vCffv6vta/+q7rqQ9A+knes04AalsxW3yPeDncCzKAebp4BVkTwJfLSQptS6yGU9hXS25DtI3U5vBd4dq89ifvkkpohYlN/zOGn8YQlwfkSssR4i4v9yuh+TWkmTO/yQt7xIChytKZCDTUmrkrZVlr9uPZY0t/iapBNJh+ettGXr2/Ok9bgpqatsEekH9qVcti+RBk+L5Xght/o3InX7vI2BTwQ7Kqf7LfDx/J0/CRwi6fUR8d85z2tyeb9A6qK7J+e9af5eh+VAT4V63ypvqW2d01SpQ1ViQJDrdUQcK2kTUqPlMNL2aflvKuyrVfa9inW5lf844B2tH4/C8vHAjM7v6qwXz6AF+JOI+F9WH+4uIw0CXUOqvK2z0J6MiD8j/RrPioh/jYgPklpKb2rPVNJ2pNboXqQZB3uRLgPwbxRWXER8hLRjbEGaGXBzfukaUt/djoVsdyYF4deRWjmLI+KfSTMqVkTEdaQHvjGIAAAJt0lEQVRf/U0j4ibgiXy/qdLp3NtK2pXVh4yV1oWkVwN/SDpBpnVI17r/LWu2uNp9EZhPukrfYkm3SPovSYdXXRe5Ura6OHYGZgJ9rN1FRk6/cz7kXRERbyftpHfnVlInj0bExRFxWSGP9SW1Tp0nIl6KiM8Bv8rLz80vPR8RX4+Ir9VJW/i8OyVdqDQV7va2l39J6kMvph+yvkXEYxHxyYj4I9I6nRQRF0bERfl2bkTc2GF9/IzUYt8mIq6IAS4JkAPMTaTtfFm+n08KTi//9aGk43Mr+g7SLKC7SC3Lk0nBbo0fN8rV+1bepbZ1jTpUOgbk7d2a9nsAaXrp9Lz+i0dkpffVuvteznNFsS4PYjFp39xea1+FtdJVWXuyZd/mcuCnwAuS/iAibs3LjyK1Tq4jzV1dX1JxzvgaLZ28kc6LiH0kfYQU6G4mtUovioifFNJ+kHTY+eekFsqhkh4ibbzjgRsLWY8jrccgHe5uKWla8bMj4tlC2TbK98eRfvmvJs2hvbfuulC6XOyXlE+9jogFko6PiEvy68cVM1Gaq/0kqUXyMeAXrUNjpcvFnkCeZ192XeSWx0WS3puDaCvYXSbpgx0C1sGkuc2bSdqR1HrdDZgvqS8iviBpD+CpiHiQ1F86J2+7CXmdX0b6odqiLe+IiKslHS7pnaQjk5e//jDSPhIRx+TvtrQtk3/NR2Hk10vXt5z+LNKMmRckHdr2uXdExHmFtJNI3UJfAD4n6ZlCeTeOiPaW6L0RcYzStX+OzXm8/Dg7mXRU8XNSy/7VuQX/WlL/+R+35TlkvS8YclvndVi1DlWKATm/95GOWA6IiMfaX6+yr0a6/lGlfS8vewPph3o3SVMi4hf5SOOrEdHe2IlI11f6NvBlSbdFxJ4R8VsN3D3bUa8G+9aFqDYmDUK9ltTymkWaMUJEnCXp74EP5NuXSC0YSBVxk7Y8dwDm5ZX65oh4XtJ+pEGQRZL+j1QBWoNRL5H6Zu8mtXCOIrWGxpF+UVsXtHqQ1Af6NVKf51RSYHo9qXLfnzf8tZJOAt6dg+1bSK0AkfokW33GddbFi8BJkr5FChYinaU30ABOlYHfKusCQEqDXC3n5PWwTy7rzfn+bODsvA3OJc2CWUUauFqodI2UuoNlkvRN0pHH6RHROvqYwOoduFJaVRyUpFp9gy4MSqoLkxIK6cvW+1LbOiLuWHPVlqpDpWOAqg0Sl95Xa+x7UHEihSpMNhhU1JjCM1o3ClOR6DAvuPDaZsBBJfNcD9i6w7Lfb1sm8nQz0o78kcJrfYXHbyj5ud8htSjGkQ6fjybNqNgX+CvSvPVNhrMuSPN4V5D6ZTfJy8Yz8LVevlF4PJ60w3yPNNBUeV3E6uliZ5FOOCrePg58epDvN7Pte6xHCoBXkPqB78n3389pWtPibu2QV2te+bgO36P9u5VKm5+fTPrR3YfUZbNv4bY/sMcw6tt1hcfHklrZE/LzGYXXPkoK8u/Ozy8m/eAe1l5/SNfw2Yw0+DqFdBQ2ubWsLe2twAdJ4wbfAx7J3/kJ0gl167WlL1Xvy27r4dYhhogBpEbCW/P9nPy4eJs7nH2VkvteXpf/yOopv0sLry0pPF4/b497ivtZ3k7Hko42lgz0fTuug7obzLf6N1LXyR6wxskvM0co7+3Jf4s2RLoTSC2J80izNM4kBdS3AZeP9TrKZZyWA9S+wPfz/U35tVawXzyK5fk2aZ71+aTplreQurLuJc0fn1Mz31NIRwXvyDvypQx+JvhkUjfARTlQHZEDyP8A7+uQ/nbSkWDrfv/W40Kaz5KmZe5O6q67Iy+/mnRi1lvHuj6M0DZ8PH+n0yn8xeYg6Uvvq0Pte6Qj0fmkH/uluQ7dy+oze4vBflwu43/l560fkNrBvlcHaF9xJN2Y+x8hDTwdSGolAxARSzu+sXz+35D0A1L3wRWF5a9X+lu8dkMOgveA4Qxsd0PpQcmy1MVByZy+G5MS1jmqMCGg6r5aZt9TxcHcqDGBYCi92mf/SjQB+HDeuV9Fmr1xfKHfcEVEDPi/pCVsHRF7AUg6Q+nEqN+QrlPy9OBvHXAQfEzFyA1sj5Qqg5KlRBcHJasMEqvapIR1UalB4qzqvjrkvhc1B3OpNoFgUG7Zj54nSYfdT5EqwHLSoXTr9olh5l8crNkI+J7SnOW5pMPHdsWB34NIJ0ltTjrM7yXtg2WtvvJ9SH3VXx/szSOsNSjZx+rWd2tQ8mBJfzqMvKX0R9v7kE5ie3lQsjhYGRFnkQYMHyMN9vaTZuYsJP1oLyrkudYgMWmQ/xHSIPFirY5gd5N+ONsH4u8mDdDW+fP1nhERZ0fEH5IGV88FfhkRqyLiVGCGVv9pOVTfV0vtexHxYqQ/k9+dtsHcQYremkAwnzTffrDJBoNq/VmCdZHSGY/fJPXBPUqaOzsrCidKSNouIh6qkfcmpBbE8RGxU162BJhNOsnkucjTBQd4/9TIp+NL6dS/qmXoJknfIV2C9nJS/2k/qW8aUtCbQTrh55nOOYxoWd4QEV05klA6Pf61rP3PSiL11/5Vh/dsBrwlIq5of60t3XqkweZH2pa9KSJuLywT6aqN38ndDh+IfJZtbv2urPn1eo6kma3uGEmTSVN8X6yyr9bd9/Ln3Ui6CN4GEfGM0myvq2LN/+BAaerrRsBDbWUQ6e80+0t/5x7bt1+xlObWnkua6vhr0lSrQ0iXZ4V0EaWPD/D2wfLdhHTa/9Gk2SInkE602TNPB9sSOCI6zCle10j6GOmIZHnrR6m405qNhLL76nD2vTydclVEDPQf0yPOwX4USHozqVX6AdKMjh+RpxZGxBuVTnCZPVgeJT7jtlzJ3kbq2lhCOq37ZlKrrv3PMtYJSv8GdXxuSV1POjX/7yPN0zYbUXX21Sr7nqRvkAa7P0W64uaeefnrSUcQxT9RGVEeoB0dE0jznFsXX3qWtO5bh+wj9osbEddIepB0caqNSH90cRVpat26qNsD22ZFtffVkvvecCZSDIuD/SiIiP+UNJt0hcg7SYM86wHjJC3K91dFxDuG8TEvSVpBOmnjdyLi/tYLku6TNC7aLqa0jmgNlh3C6sGy4iyEs0l/v2c2bDX31Sr7XqfB3ItJg7l7dO+buRtnVOXBn9nk0+DzL3o3PmfHYoVbV3VzYNtsMHX31YH2veFOpBgJDvbW07o1sG02mnphIoWDvfWs0RjYNhtNYzmRwn321stGbWDbbDSNxUQKB3vrWaM0sG02msZsIoW7caznjdbAttloGu2JFA72ZmYN4AuhmZk1gIO9mVkDONibmTWAg72ZWQM42JuZNcD/B5+ubT16vJtvAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_multi_df = train_cut_df[['content_id', 'word', 'char']]\n",
    "train_multi_df['class'] = train_cut_df.apply(lambda row: '{}_{}'.format(row['subject'],row['sentiment_value']), axis=1)\n",
    "train_multi_df['class'].value_counts().plot(kind='bar')\n",
    "train_multi_df[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>价格_-1</th>\n",
       "      <th>价格_0</th>\n",
       "      <th>价格_1</th>\n",
       "      <th>内饰_-1</th>\n",
       "      <th>内饰_0</th>\n",
       "      <th>内饰_1</th>\n",
       "      <th>动力_-1</th>\n",
       "      <th>...</th>\n",
       "      <th>油耗_1</th>\n",
       "      <th>空间_-1</th>\n",
       "      <th>空间_0</th>\n",
       "      <th>空间_1</th>\n",
       "      <th>舒适性_-1</th>\n",
       "      <th>舒适性_0</th>\n",
       "      <th>舒适性_1</th>\n",
       "      <th>配置_-1</th>\n",
       "      <th>配置_0</th>\n",
       "      <th>配置_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01DqbWMwRjfBxLYz</td>\n",
       "      <td>自动 启停 对车 没 好处 ， 等 红灯 挂 N 挡 ， 轻 刹车 。</td>\n",
       "      <td>自 动 启 停 对 车 没 好 处 ， 等 红 灯 挂 N 挡 ， 轻 刹 车 。</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>01X4vSqHci6NPYBy</td>\n",
       "      <td>开 的 2.0 ？ 在 西藏 动力 如何 ？</td>\n",
       "      <td>开 的 2 . 0 ？ 在 西 藏 动 力 如 何 ？</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>02VHu4amZkpAGTb7</td>\n",
       "      <td>常用 配件 价格 不 贵 ， 跟 大众 差不多 。 而且 常用 配件 都 有 备货 ， 如果...</td>\n",
       "      <td>常 用 配 件 价 格 不 贵 ， 跟 大 众 差 不 多 。 而 且 常 用 配 件 都 ...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         content_id                                               word  \\\n",
       "0  01DqbWMwRjfBxLYz                自动 启停 对车 没 好处 ， 等 红灯 挂 N 挡 ， 轻 刹车 。   \n",
       "1  01X4vSqHci6NPYBy                             开 的 2.0 ？ 在 西藏 动力 如何 ？   \n",
       "2  02VHu4amZkpAGTb7  常用 配件 价格 不 贵 ， 跟 大众 差不多 。 而且 常用 配件 都 有 备货 ， 如果...   \n",
       "\n",
       "                                                char  价格_-1  价格_0  价格_1  \\\n",
       "0          自 动 启 停 对 车 没 好 处 ， 等 红 灯 挂 N 挡 ， 轻 刹 车 。      0     0     0   \n",
       "1                        开 的 2 . 0 ？ 在 西 藏 动 力 如 何 ？      0     0     0   \n",
       "2  常 用 配 件 价 格 不 贵 ， 跟 大 众 差 不 多 。 而 且 常 用 配 件 都 ...      0     1     0   \n",
       "\n",
       "   内饰_-1  内饰_0  内饰_1  动力_-1  ...   油耗_1  空间_-1  空间_0  空间_1  舒适性_-1  舒适性_0  \\\n",
       "0      0     0     0      0  ...      0      0     0     0       0      0   \n",
       "1      0     0     0      0  ...      0      0     0     0       0      0   \n",
       "2      0     0     0      0  ...      0      0     0     0       0      0   \n",
       "\n",
       "   舒适性_1  配置_-1  配置_0  配置_1  \n",
       "0      0      0     0     0  \n",
       "1      0      0     0     0  \n",
       "2      0      0     0     0  \n",
       "\n",
       "[3 rows x 33 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if '价格_0' not in train_multi_df.columns:\n",
    "    train_multi_df = pd.concat((train_multi_df, pd.get_dummies(train_multi_df['class'])), axis=1)\n",
    "\n",
    "train_multi_df = train_multi_df.groupby(['content_id', 'word', 'char']).sum().reset_index(drop=False)\n",
    "train_multi_df[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['价格_-1',\n",
       " '价格_0',\n",
       " '价格_1',\n",
       " '内饰_-1',\n",
       " '内饰_0',\n",
       " '内饰_1',\n",
       " '动力_-1',\n",
       " '动力_0',\n",
       " '动力_1',\n",
       " '外观_-1',\n",
       " '外观_0',\n",
       " '外观_1',\n",
       " '安全性_-1',\n",
       " '安全性_0',\n",
       " '安全性_1',\n",
       " '操控_-1',\n",
       " '操控_0',\n",
       " '操控_1',\n",
       " '油耗_-1',\n",
       " '油耗_0',\n",
       " '油耗_1',\n",
       " '空间_-1',\n",
       " '空间_0',\n",
       " '空间_1',\n",
       " '舒适性_-1',\n",
       " '舒适性_0',\n",
       " '舒适性_1',\n",
       " '配置_-1',\n",
       " '配置_0',\n",
       " '配置_1']"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_multi_df.to_csv('../../data/csvs/multi_train.csv')\n",
    "# train_multi_df.iloc[:, 3:].values\n",
    "multi_label_itos = list(train_multi_df.iloc[:, 3:].columns)\n",
    "pickle.dump(multi_label_itos, open('../../data/multi_label_itos.pkl', 'wb'))\n",
    "multi_label_itos"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 1.6 多标签分类，先分主题再分情感"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7fac91dfd710>"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEQCAYAAABRDdSSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGiBJREFUeJzt3XmUnVWd7vHvk4GpkXSAKIhobLiIDRrQoLRGkEhwAmSw1W4ZxAG17aYVWxtdF5u+jjgub2Pb4oBI4wjSDZFBIFHBAVYQCbRTqxc0XHUlRAkigsBz/9i7bk7KSqqs1HvOqezns1ZWndrnVO1fnVQ9Z7/73fs9sk1ERGzZZgy6gIiI6F7CPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaMCsQRcwYuedd/b8+fMHXUZExLRyww03rLE9b7zHDU3Yz58/nxUrVgy6jIiIaUXSbRN5XKZxIiIakLCPiGhAwj4iogEJ+4iIBiTsIyIakLCPiGhAwj4iogEJ+4iIBiTsIyIaMDQ7aCdi/mlf2qyvv/Vdz52iSiIippeM7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGjChsJd0rqRvSbpY0gGSVkm6tv57jKRtJC2VdJOk81T8QVvXP0xERIxt3LCXtAiYZftAYAdgV+DDthfVfz8AjgNW2V4AzAWWbKQtIiIGYCIj+18CH+x5/FzgWEnXS7qwjtgXA1fWxywDDtlIW0REDMC4YW/7v21fL+lo4EHg+8Dptp9EGeUfDOwE3Fm/ZB2w40baNiDpZEkrJK1YvXr1Zv8wERExtonO2R8JnAIcAfwIuKredSvwUGANMKe2zamfj9W2Adtn215oe+G8efMm+SNERMR4JjJnvwvwBuBw23cBpwIvkjQD2Be4BbgaOKx+yWJg+UbaIiJiACYysj+RMl1zhaRrgd8CJwHXARfZ/i5wPrCbpJXAWkrQj9UWEREDMO4bjts+EzhzVPPbRz3mXuDwUY8Zqy0iIgYgm6oiIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaMCEwl7SuZK+JeliSdtLWirpJknnqdhmIm1d/zARETG2ccNe0iJglu0DgR2AlwKrbC8A5gJLgOMm2BYREQMwkZH9L4EP9jz+DODK+vky4BBg8QTbIiJiAMYNe9v/bft6SUcDDwI3AnfWu9cBOwI7TbBtA5JOlrRC0orVq1dv1g8SEREbN9E5+yOBU4AjgF8Ac+pdc4A19d9E2jZg+2zbC20vnDdv3mR/hoiIGMdE5ux3Ad4AHG77LuBq4LB692Jg+R/RFhERAzCRkf2JwK7AFZKuBWYDu0laCaylhPr5E2yLiIgBmDXeA2yfCZw5qvkjoz6/Fzh8Am0RETEA2VQVEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMmFPaSZku6pN4+QNIqSdfWf4+RtI2kpZJuknSeij9o6/ZHiYiIjRk37CVtC9wALKlNc4EP215U//0AOA5YZXtBvX/JRtoiImIAxg172/fYfjywqjbNBY6VdL2kC+uIfTFwZb1/GXDIRtoiImIAJjNn/yPgdNtPAnYFDgZ2Au6s968DdtxI2wYknSxphaQVq1evnkQpERExEZMJ+1uBq3puPxRYA8ypbXPq52O1bcD22bYX2l44b968SZQSERETMZmwPxV4kaQZwL7ALcDVwGH1/sXA8o20RUTEAEwm7M8CTgKuAy6y/V3gfGA3SSuBtZSgH6stIiIGYNZEH2h7z/rx58DTR913L3D4qC8Zqy0iIgYgm6oiIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGJOwjIhqQsI+IaEDCPiKiAQn7iIgGTPhCaFGdMWf8x4z7Pe4c/zEREVMoI/uIiAYk7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGpCwj4hoQMI+IqIBCfuIiAYk7CMiGpBLHE9Tjzv3cZv19TefePMUVRIR00FG9hERDUjYR0Q0YELTOJJmA1+0fYSkbYALgN2BlcAJwNYTabPtKf8JYmC+t/djN/t7PPb735uCSiJiPOOO7CVtC9wALKlNxwGrbC8A5tb2ibZFRMQAjBv2tu+x/XhgVW1aDFxZby8DDvkj2iIiYgAmM2e/EzDyjtnrgB3/iLYNSDpZ0gpJK1avXj2JUiIiYiImE/ZrgDn19pz6+UTbNmD7bNsLbS+cN2/eJEqJiIiJmEzYXw0cVm8vBpb/EW0RETEAkwn784HdJK0E1lJCfaJtERExABPeQWt7z/rxXuDwUXdPtC0iIgYgl0uIae1Dr1q22d/jNf+2eAoqiRhu2UEbEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGArMaJmALve+HmrzJ+/eeWTkElEWPLyD4iogEJ+4iIBiTsIyIakLCPiGhAwj4iogEJ+4iIBiTsIyIakLCPiGhAwj4iogEJ+4iIBiTsIyIakLCPiGhAwj4iogEJ+4iIBiTsIyIakLCPiGhAwj4iogEJ+4iIBiTsIyIakLCPiGhA3nA8Ygux6rRrNvt7POJdT5uCSmIYZWQfEdGAhH1ERAMmNY0j6QDgIuDW2vQa4O3A7sBK4ARga+CC3jbb3sx6I2LInXHGGUPxPWJDkx3ZzwU+bHuR7UXAAcAq2wvqfUuA48Zoi4iIAZjsCdq5wLGSngf8DLiPMooHWAYcAjwKuHBU25cnX2pEREzWZEf2PwJOt/0kYFfgGODOet86YEdgpzHaNiDpZEkrJK1YvXr1JEuJiIjxTHZkfytwS8/t/YE59fM5wBpg+zHaNmD7bOBsgIULF2Y+PyKmxNXL9tjs7/GMxT+egkqGx2RH9qcCL5I0A9gXeD1wWL1vMbAcuHqMtoiIGIDJhv1ZwEnAdZRVOR8HdpO0ElhLCfrzx2iLiIgBmNQ0ju2fA08f1Xz4qM/vHaMtIqIZuyz/zmZ/j18cst8UVJJNVRERTUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0IGEfEdGAhH1ERAMS9hERDUjYR0Q0oLOwl7SNpKWSbpJ0niR11VdERGxalyP744BVthcAc4ElHfYVERGb0GXYLwaurLeXAYd02FdERGyCbHfzjaUrgPfYvkrSy4EDbL9y1GNOBk6unz4G+MFmdrszsGYzv8fmGoYaYDjqGIYaYDjqGIYaYDjqGIYaYDjqmIoaHmV73ngPmrWZnWzKGmBOvT2HMX4g22cDZ09Vh5JW2F44Vd9vutYwLHUMQw3DUscw1DAsdQxDDcNSRz9r6HIa52rgsHp7MbC8w74iImITugz784HdJK0E1lLCPyIiBqCzaRzb9wKHd/X9N2LKpoQ2wzDUAMNRxzDUAMNRxzDUAMNRxzDUAMNRR99q6OwEbUREDI/soI2IaEDCPiKiAQn7iIgGJOynkKRDJe00BHU8VNJe9faTJT26T/2+XtI/1tvb97RvLelv+1FDT5979NzepX6cUTfyDZykl/Wxr6F+LgZN0t9JOnjQdXRt2p+glTSy4udO4G7gLuBntn/Xp/5PB75k+9uSLqj9bwMY2Mr28/tQw/HAjcADwFOBG4BjbJ8u6QXAO23vsanvMUV1XAK8C3gRcBNwsO3jJc0Eltp+dtc19NRype0lkmYAl9p+Vm2/rB91SLoKuB94GXCO7cNG3X+F7Wd2XUfta6DPRe3rIOAa25a0J3BfvWuN7d/2of/vAD8d+RSYCbzD9rWSnk3Z7b9vH+r4GiWrxrwbuNH26V303eUO2n55I3A58BDKTt25wP6S/sn25/rQ/2OBP60jtfuA+cBLgM9SQq8ffgM8A/g4sC/wGeBRALY/L+nv+1THrykvdP9l+2OSjqo1PNCvi55K2hf4PTCzHt0ImN1zu1+jm5nApZS/sXslfQXYn/Ic7QDc03UBQ/RcAPwTcGi9vQz4D8rMwoFAP3aQ/sr2kZLmAvfY/p2kBXWw9iCwVR9qALjb9hFj3VGvDLxC0ju7eAHcEsL+Advv6G2QdChwKtBp2Et6KrAO+GDt63JgF9u3SbrH9m1d9t/jKuBW4HnA3sACYB9Jy+r93+yy8zpiXAncDlwLfKvetbWki+lvsPw98DvKi93f1r7n99zul/8BHAVcUD+/iPIcLKXsP7mrDzUMy3MB8KDXTyP8wPYpAJKOkrR13ZfTpZG+nw8cLelBYHvgesrA7GMd979BHZLOoVwX56HAL4Hdbe8PPLGrjqdt2EuaD/xiI3f/FyXsu3Y48BxgH+C5wDnAwyS9BXh0/fgO2/d3WYTtuyR9z/ZiSefbfrGki20f2WW/Pf0/WF/4/gV4MvBaSc8HfjdSg6RL+1EL8AbgGGAf26fU0dIePeHSrzpuB75OOcrblRIspr+j6WF5Lqj9jbzwP77epn7+SqDr6aRdJZ1AOfoeGQReA+xHmWp7Xcf9j4zcR86T7mb7sDrFdqSkKzf1tVNh2oY98CnKHPV+9ZIM91Ouv/NR298Hft51AbbfJOmdlJH904GLKeF/PXAd5fl9oOs6qmsk7Q78c522+bik1wD/2jOi6oztO+uJ4CdR/phOpfx+j4wg+7UYYAawHXCrpBuBgylTBtRzB/06XN+RcpT1WWA2sHVt7+eIeuDPhaQnAu8HdgOeYHudpGvY8Oji9q7rAP4XJehd+51ByYyrKNOgHwFO6riGmZQj314e9bEz0zbsbR8EIGm57UMkbQUcAZwr6fO239enUkYOx+8E/gI42fYdfeobAEn7UE7M/pxykvpYyrTB/sDVkmbZvqUPpfwj8Gzb75X0DNvvrfXNopxP6JzttZIuBI4EXmF7HXVLej13cHw/6qBMn6ylTGn9lPVXfe3byH4YngvbN0g6AnghcJWkM4HvA/9QH7IV8M90PDiz/RlJPwJuBn5GCfn/SVnM8VZJL5S0Q32Ouqrhfkm315U/N0paSjmfc0lXffbaElbjfN32U3s+344yT3qN7Xd23Pc3gDsoI6Y7gJ9QwhbK6GHr0aswOqrj3yhXGP0e5YTcUygnrK+h/GFtPfq9BDqo4Uzgt8CJwCeBoykvOFAC7mbb/9FlDbWOz9X+tqIEbq8ZlBVSx/ShjhXA1yhHfWexfr78/lrHzrYf3nENQ/Fc1FquBL4BfGLkXJakVwPX2r65TzVcYvuIGrYfpfytvsL2ij71fxLw15ST1d8c64hb0kzbncwGTNuRfY8Lez+x/VtJLwZO67pj20+RNJuyuuB9lLm/a4EP2P591/331PGq+sf0Kcoh6fbA8cDbKW8N+e4+lLGccpRzJOUKp7+lvCHNOZTD149KurwPS2JfB9xLebEx8H8oy0ChvgB33P+IKyjBfjewje0N3pZT0pf7UMPAnwtJj6RM7d1Peee6HSUdSBmUGPiipBfYvrHjOmay/uc9kXIkDHCBpHfb/lKX/QPYPofy94CkyyXdN+ohMygvzJ0MELeEkf1elFHLncC6fsxPj1HDvsC2lD+kZ1HWMnd6UnaMGh5ea7ifckLw/1L+oI8B/tP2T/pUx3HAf1J+cY+tdXwZ2Nf2yn7UUOs4GHgkZZndBeM9vuNaNlhtIulVwPYj01x96H9gz4WkhwGvogTYVymrkR5DWe4IZRnqzba/0nEdM4AFIy8qkv6EcqS1prb348W3txZsPziqfRawqKvnYksI+8so831zKevsZ1PWer+3T+vsqWt3vwE8z/YP+9HnRupYRRk9LaEErEZu235px32/nPIH/C3gQ8Cj67+llGVtjx29RLbjevYa/X9Rj8IOtX1ZH/r/DmWjG5S35Ny3574XASf2eZPZAts3jf/ITms4CHhIP0bR49RxKPABSkacO4D+R97I6ZXA8SObqGrYL7f9tC76nbbTOHWp368o69zfDNwxMnUi6QmUJ7LzsK9BfyHll+c9knrn7Lez/byua+jxXdsn1eVcL631Xdl10Fd/Rwn2X1DW3D+k7pbcnbL88C+7LkDSAcCdNeTPAg6T9EbKi/8syrmcTwIP67oW4DbbJ9W6NniXNtufldTpOZRekv6cMsW3QNJOtu+oI9uP2D6uTzW8BdgL+L2kY0fd/W3bZ/WpjuMpR5yH2v5lP/ocg+sy6c8DH5J0ve0n1RO4nc0ITNuwp5zdF2WN+3uAXSTtAFwGvK/rE5IAkl4CvJiyi/fbwCWs3wYOfZofrisu7gP2lfRpyoaqT7P++emHtZRprLuB7wA3qVx7ZW/KieOPU84jdOkgyjK+H1LmqqHshXgdJfzfz8b3ZkyZOkKbNartEz2fPoSyKqRfTgPW1GWwF0paTNnB+8g+1vA0yov+iGdSjkIFLJP0kS7Pc0l6O+V3YlvKEdcr168K/v+LKd7cVf9j1LMX5QT5V0fN3Xc21TJtw75nY8gnbb+k3p4FvAK4XNILba/quIyHATsBj7C9QtK7KTtZb6ZcL6dfSzBPofwif4Eywr6Qso55Rm3rh+2Ah1NOEL8ceARlTvRdlBfBz/ShhuuBN9TzF4+WdCowqy7/u8v2ryX145pJD1B+B7ejXArgbZRzKSNmUY5IO1fPoayl/I5akkfmirscRY7Btm+v/b6Uci7p0y6XLXhJHxY0fJNyUng2ZWA0er/D7I77H5lGfA3wUNs/lPSKkb7rc9LpHoxpfdXLeij8FklnqVxl8U2UJ/OtlN2cnbJ9JuV6H8dIOhf4d8ru3acAt/RrTbft222voZzsWkAZOe5Xb+9QR3Jd+ypl6/cvKCP8O+rJ8usoRxeHbuJrp8pPKM/BDZQwvYHyB97rN10XUX/upwF7An9F2VD0NsolFD5MeUGe23Udkt5fa3h9+VQnUHZ4nyDpxK7776njFMoO1ufUUHsmcNTIyizbozcaTTnbS21fQZnGex3weODrtq+o/5Z2XQNlEDCb9ddF6vwCcL2mddgDf2X7p6yfqrgWuMX25ZTlVZ1SuXTsr2yfQBlVLrb9WduvpSw326/rGnpq2ZMygj4Q+GL9+OR6e1HX/dt+I2WTysMolwn4Wr3rcuAtlOmcrq0FdrD9VWBt/bhDnS/eQ9LjWT+907V9KNMnj6aM7q+2/THKdVBWdr36Q9JDKJv8ftyzbnvk4/1seKTRZR0zWH+Vx32AQ4B59G96caSOfeo01sp6YnwucHM98ukL2w/afg/wG5UrbY4MSO+1/QnbnW48nLbTOGO4iLIz7veSnmz7uj70eSJlVP9lyvLP2ZJ6V5z0ZalTDbGzbB9UT0h+kBK2VwDn2v5xH2p4LWUU/TfAvwLHquxYvJ8yrfOVrmuwfU/P879t/fgyyuHxZZQLT3236zqqGZS/L1P2Gewiabc+9T1yvaRFwP9WvYa97fMlvdz2p4G+XFO/ThmdK+nFNehGBiYXSHpt10suexxNWRW2s6S9KSv3FgBnSJpn+wN9qgPKwd9lKrt2n0s5Ih7R2VTOdB/Zj1w8aDvKZp53UZ64fkxbYPstlCmbX1J2xq2izJdfSHnx6cehIZR1yyfXFRb71zXdTwduA5ZKulrq/BrDN1OC9O56+xzKi+HNlBO0b+24/xFXqFwT6Ki6YusZlB3OB9UaPrGpL55CP6RsapvH+pHsWcCfUa66+NddF2D7AduvAZ5AGQSJcrnjmV33PQZJOqguv3w4cCYleEfaOmX7bbb/gnJO71+AX9teZ/tUYFFdwdcvkvQp4AzgObb3ro3bsH6QMvWdTvd19gCSdrX983pbA9pYtTPwFNsXj/vg7mqYSTkRd9uotv1s39CH/gU83/YX6jTCq11379bR0+o+1PAFysXHLqIsyV1F2TkK5YV5EfAC23eP/R2mrI4/t92vo4hNkrQj5cjqiZQVIHfXxQyXug+X86g1nATszh9eGFDAn9h+Uz/q6KnnENvL6+0dKUt2+3LRQpXlyNsCP+rdWFX/fnbramHJFhH2EWOR9GbK0d+KkQFA7x95S+pSv3W2O196GsNpuk/jRGxA0lfqnDCUk4FHUFYnAdBa0Ev6pKRvUab6Lu5p/zOVXc/RiC3pBG0ElN2y/1BXgWxPeRvAl/ecslhp+6pBFTcAj7J9IICk0yRtZfs+yvVq+vFuWTEkEvaxpfkVcC5l085dwAo2XOHwNsq1zFvRO0+7LfB1SedRdhYfMJiSYhAS9rHFqDsUf01Zgnsa5VLL1446Cdb1uxENhboy61WUK6CO+DXlwnj/DlzX9UnqGC6Zs48tRt1y/1bKyH4JZTPXEyW9U9IlKu8I9JIBlthvMwFLeobKRbe271lxMl/l8sPRiKzGiS2GpP0pyy1fDXyecumKmcDTbD9O5QqgSzb1PbY0qldUlPQsyh6DZZRLRnyNsky3H29sE0Mg0zixJdmGcmG6kXchuocN3/S92ZGN7csl/ZByzmJbyvviXgok7BuRsI8thu1vSlpCucLmdygXQpsJzFB5c+cZki61/ZxB1tlnD0paCRwH/Knt74/cIel7kmZ41DsmxZYp0zixxaknapdQr5delxo2T9LevWEfbUnYR0Q0IKtxIiIakLCPiGhAwj4iogEJ+4iIBiTsIyIa8P8ARK9ku1GM290AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_multi_sub_df = train_cut_df[['content_id', 'word', 'char', 'subject']]\n",
    "train_multi_sub_df['subject'].value_counts().plot(kind='bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content_id</th>\n",
       "      <th>word</th>\n",
       "      <th>char</th>\n",
       "      <th>价格</th>\n",
       "      <th>内饰</th>\n",
       "      <th>动力</th>\n",
       "      <th>外观</th>\n",
       "      <th>安全性</th>\n",
       "      <th>操控</th>\n",
       "      <th>油耗</th>\n",
       "      <th>空间</th>\n",
       "      <th>舒适性</th>\n",
       "      <th>配置</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01DqbWMwRjfBxLYz</td>\n",
       "      <td>自动 启停 对车 没 好处 ， 等 红灯 挂 N 挡 ， 轻 刹车 。</td>\n",
       "      <td>自 动 启 停 对 车 没 好 处 ， 等 红 灯 挂 N 挡 ， 轻 刹 车 。</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>01X4vSqHci6NPYBy</td>\n",
       "      <td>开 的 2.0 ？ 在 西藏 动力 如何 ？</td>\n",
       "      <td>开 的 2 . 0 ？ 在 西 藏 动 力 如 何 ？</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>02VHu4amZkpAGTb7</td>\n",
       "      <td>常用 配件 价格 不 贵 ， 跟 大众 差不多 。 而且 常用 配件 都 有 备货 ， 如果...</td>\n",
       "      <td>常 用 配 件 价 格 不 贵 ， 跟 大 众 差 不 多 。 而 且 常 用 配 件 都 ...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         content_id                                               word  \\\n",
       "0  01DqbWMwRjfBxLYz                自动 启停 对车 没 好处 ， 等 红灯 挂 N 挡 ， 轻 刹车 。   \n",
       "1  01X4vSqHci6NPYBy                             开 的 2.0 ？ 在 西藏 动力 如何 ？   \n",
       "2  02VHu4amZkpAGTb7  常用 配件 价格 不 贵 ， 跟 大众 差不多 。 而且 常用 配件 都 有 备货 ， 如果...   \n",
       "\n",
       "                                                char  价格  内饰  动力  外观  安全性  操控  \\\n",
       "0          自 动 启 停 对 车 没 好 处 ， 等 红 灯 挂 N 挡 ， 轻 刹 车 。   0   0   0   0    0   1   \n",
       "1                        开 的 2 . 0 ？ 在 西 藏 动 力 如 何 ？   0   0   1   0    0   0   \n",
       "2  常 用 配 件 价 格 不 贵 ， 跟 大 众 差 不 多 。 而 且 常 用 配 件 都 ...   1   0   0   0    0   0   \n",
       "\n",
       "   油耗  空间  舒适性  配置  \n",
       "0   0   0    0   0  \n",
       "1   0   0    0   0  \n",
       "2   0   0    0   0  "
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if '价格' not in train_multi_sub_df.columns:\n",
    "    train_multi_sub_df = pd.concat((train_multi_sub_df, pd.get_dummies(train_multi_sub_df['subject'])), axis=1)\n",
    "\n",
    "train_multi_sub_df = train_multi_sub_df.groupby(['content_id', 'word', 'char']).sum().reset_index(drop=False)\n",
    "train_multi_sub_df[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['价格', '内饰', '动力', '外观', '安全性', '操控', '油耗', '空间', '舒适性', '配置']"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_multi_sub_df.to_csv('../../data/csvs/multi_sub_train.csv')\n",
    "multi_sub_label_itos = list(train_multi_sub_df.iloc[:, 3:].columns)\n",
    "pickle.dump(multi_sub_label_itos, open('../../data/multi_sub_label_itos.pkl', 'wb'))\n",
    "multi_sub_label_itos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 0, 2, ..., 0, 0, 1])"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(train_df['sentiment_value']+1).values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# $$二、数据特征处理$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1 低频词过滤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7fe7c5871c284ad6bb6a09d805e7e9ad",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "820fe869850a4950b21b44f857d75a4b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=12572), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 训练数据少，不处理低频词\n",
    "def construct_dict(df, d_type='word'):\n",
    "    word_dict = {}\n",
    "    corput = df.word if d_type == 'word' else df.char\n",
    "    for line in tqdm(corput):\n",
    "        for e in line.strip().split():\n",
    "            word_dict[e] = word_dict.get(e, 0) + 1\n",
    "    return word_dict\n",
    "word_dict = construct_dict(train_df, d_type='word')\n",
    "char_dict = construct_dict(train_df, d_type='char')\n",
    "word_stop_word = set([e for e in word_dict if word_dict[e] <=0])\n",
    "char_stop_word = set([e for e in char_dict if char_dict[e] <=0])\n",
    "pickle.dump(set(word_stop_word), open('../../data/word_stopword.pkl', 'wb'))\n",
    "pickle.dump(set(char_stop_word), open('../../data/char_stopword.pkl', 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2 合适长度选择"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9\t1\t0.0003632401017072285\n",
      "10\t3\t0.001452960406828914\n",
      "11\t59\t0.022884126407555393\n",
      "12\t57\t0.04358881220486742\n",
      "13\t50\t0.06175081729022884\n",
      "14\t62\t0.08427170359607701\n",
      "15\t60\t0.10606610969851071\n",
      "16\t62\t0.12858699600435888\n",
      "17\t63\t0.15147112241191427\n",
      "18\t90\t0.18416273156556484\n",
      "19\t78\t0.21249545949872867\n",
      "20\t70\t0.23792226661823465\n",
      "21\t63\t0.26080639302579006\n",
      "22\t59\t0.28223755902651654\n",
      "23\t49\t0.3000363240101707\n",
      "24\t67\t0.324373410824555\n",
      "25\t49\t0.3421721758082092\n",
      "26\t55\t0.36215038140210676\n",
      "27\t38\t0.37595350526698146\n",
      "28\t34\t0.38830366872502725\n",
      "29\t56\t0.40864511442063206\n",
      "30\t54\t0.4282600799128224\n",
      "31\t38\t0.4420632037776971\n",
      "32\t46\t0.4587722484562296\n",
      "33\t41\t0.47366509262622597\n",
      "34\t31\t0.48492553577915004\n",
      "35\t52\t0.5038140210679259\n",
      "36\t35\t0.516527424627679\n",
      "37\t42\t0.5317835088993825\n",
      "38\t47\t0.5488557936796222\n",
      "39\t37\t0.5622956774427896\n",
      "40\t39\t0.5764620414093715\n",
      "41\t37\t0.5899019251725389\n",
      "42\t34\t0.6022520886305847\n",
      "43\t49\t0.6200508536142388\n",
      "44\t32\t0.6316745368688701\n",
      "45\t30\t0.642571739920087\n",
      "46\t34\t0.6549219033781327\n",
      "47\t41\t0.6698147475481291\n",
      "48\t32\t0.6814384308027603\n",
      "49\t27\t0.6912459135488556\n",
      "50\t26\t0.7006901561932435\n",
      "51\t29\t0.7112241191427531\n",
      "52\t27\t0.7210316018888483\n",
      "53\t32\t0.7326552851434796\n",
      "54\t32\t0.7442789683981108\n",
      "55\t23\t0.7526334907373771\n",
      "56\t26\t0.762077733381765\n",
      "57\t26\t0.7715219760261529\n",
      "58\t26\t0.7809662186705408\n",
      "59\t18\t0.7875045405012709\n",
      "60\t18\t0.794042862332001\n",
      "61\t22\t0.8020341445695601\n",
      "62\t20\t0.8092989466037047\n",
      "63\t19\t0.816200508536142\n",
      "64\t16\t0.8220123501634576\n",
      "65\t12\t0.8263712313839444\n",
      "66\t19\t0.8332727933163817\n",
      "67\t15\t0.8387213948419902\n",
      "68\t13\t0.8434435161641841\n",
      "69\t10\t0.8470759171812564\n",
      "70\t14\t0.8521612786051576\n",
      "71\t13\t0.8568833999273515\n",
      "72\t10\t0.8605158009444238\n",
      "73\t17\t0.8666908826734467\n",
      "74\t11\t0.8706865237922262\n",
      "75\t12\t0.875045405012713\n",
      "76\t15\t0.8804940065383214\n",
      "77\t14\t0.8855793679622226\n",
      "78\t8\t0.8884852887758804\n",
      "79\t9\t0.8917544496912455\n",
      "80\t14\t0.8968398111151467\n",
      "81\t9\t0.9001089720305118\n",
      "82\t9\t0.9033781329458769\n",
      "83\t7\t0.9059208136578275\n",
      "84\t5\t0.9077370141663637\n",
      "85\t7\t0.9102796948783143\n",
      "86\t7\t0.9128223755902649\n",
      "87\t4\t0.9142753359970939\n",
      "88\t8\t0.9171812568107517\n",
      "89\t6\t0.919360697420995\n",
      "90\t5\t0.9211768979295312\n",
      "91\t12\t0.925535779150018\n",
      "92\t9\t0.9288049400653831\n",
      "93\t7\t0.9313476207773337\n",
      "94\t5\t0.9331638212858698\n",
      "95\t9\t0.9364329822012349\n",
      "96\t6\t0.9386124228114783\n",
      "97\t5\t0.9404286233200144\n",
      "98\t6\t0.9426080639302578\n",
      "99\t4\t0.9440610243370867\n",
      "100\t5\t0.9458772248456229\n",
      "101\t4\t0.9473301852524518\n",
      "102\t2\t0.9480566654558663\n",
      "103\t4\t0.9495096258626953\n",
      "104\t2\t0.9502361060661098\n",
      "105\t6\t0.9524155466763531\n",
      "106\t3\t0.9535052669814748\n",
      "107\t3\t0.9545949872865964\n",
      "108\t3\t0.9556847075917181\n",
      "109\t3\t0.9567744278968398\n",
      "110\t3\t0.9578641482019614\n",
      "111\t3\t0.9589538685070831\n",
      "112\t6\t0.9611333091173264\n",
      "113\t2\t0.9618597893207409\n",
      "114\t2\t0.9625862695241554\n",
      "115\t4\t0.9640392299309843\n",
      "116\t1\t0.9644024700326915\n",
      "117\t3\t0.9654921903378132\n",
      "118\t5\t0.9673083908463493\n",
      "119\t3\t0.968398111151471\n",
      "120\t6\t0.9705775517617143\n",
      "122\t2\t0.9713040319651288\n",
      "123\t1\t0.971667272066836\n",
      "124\t1\t0.9720305121685432\n",
      "125\t1\t0.9723937522702504\n",
      "126\t4\t0.9738467126770793\n",
      "127\t2\t0.9745731928804938\n",
      "128\t1\t0.974936432982201\n",
      "129\t2\t0.9756629131856155\n",
      "131\t1\t0.9760261532873227\n",
      "132\t3\t0.9771158735924443\n",
      "133\t3\t0.978205593897566\n",
      "135\t2\t0.9789320741009805\n",
      "137\t1\t0.9792953142026877\n",
      "138\t1\t0.9796585543043949\n",
      "139\t1\t0.980021794406102\n",
      "142\t2\t0.9807482746095165\n",
      "144\t1\t0.9811115147112237\n",
      "145\t1\t0.9814747548129309\n",
      "146\t1\t0.9818379949146381\n",
      "147\t2\t0.9825644751180526\n",
      "148\t2\t0.983290955321467\n",
      "149\t1\t0.9836541954231742\n",
      "150\t2\t0.9843806756265887\n",
      "151\t4\t0.9858336360334177\n",
      "153\t1\t0.9861968761351249\n",
      "155\t3\t0.9872865964402465\n",
      "156\t1\t0.9876498365419537\n",
      "158\t2\t0.9883763167453682\n",
      "159\t1\t0.9887395568470754\n",
      "161\t2\t0.9894660370504899\n",
      "163\t2\t0.9901925172539043\n",
      "165\t1\t0.9905557573556115\n",
      "166\t1\t0.9909189974573187\n",
      "167\t1\t0.9912822375590259\n",
      "168\t3\t0.9923719578641476\n",
      "170\t1\t0.9927351979658547\n",
      "171\t2\t0.9934616781692692\n",
      "173\t1\t0.9938249182709764\n",
      "174\t1\t0.9941881583726836\n",
      "176\t1\t0.9945513984743908\n",
      "177\t3\t0.9956411187795124\n",
      "178\t1\t0.9960043588812196\n",
      "179\t1\t0.9963675989829268\n",
      "182\t2\t0.9970940791863413\n",
      "185\t1\t0.9974573192880485\n",
      "187\t1\t0.9978205593897557\n",
      "190\t1\t0.9981837994914629\n",
      "192\t1\t0.99854703959317\n",
      "195\t1\t0.9989102796948772\n",
      "199\t3\t0.9999999999999989\n"
     ]
    }
   ],
   "source": [
    "# 1000长度以内可以随意选，这里选最长的\n",
    "# word单句合适长度 128\n",
    "# char单句合适长度 200\n",
    "def check_len(df, type='word'):\n",
    "    s_dct = {}\n",
    "    corpus = df.word if type == 'word' else df.char\n",
    "    for s in corpus:\n",
    "        s = [e for e in s.split() if e not in char_stop_word]\n",
    "        s_dct[len(s)] = s_dct.get(len(s), 0) + 1\n",
    "    l_lst = sorted(s_dct.items(), key=lambda x:x[0], reverse=False)\n",
    "    sum(s_dct.values())\n",
    "    p = 0.\n",
    "    for l in l_lst:\n",
    "        p += l[1] / sum(s_dct.values())\n",
    "        print(str(l[0]) + '\\t' + str(l[1]) + '\\t' + str(p))\n",
    "check_len(test_cut_df, 'char')\n",
    "# check_len(test_cut_df, 'char')\n",
    "# check_len(train_df_final, 'char')\n",
    "# check_len(test_cut_df, 'char')\n",
    "# len(train_cut_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 316,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# 按句子分合适长度\n",
    "# max_sent = 20\n",
    "# max_word_len = 40\n",
    "# max_char_len = 70\n",
    "def check_lens(df):\n",
    "    max_word = {}\n",
    "    max_word_len = {}\n",
    "    max_char = {}\n",
    "    max_char_len = {}\n",
    "    word = df.word\n",
    "    char = df.char\n",
    "    for w in word:\n",
    "        sents = re.split(r\" 。 | ， \", w.strip())\n",
    "        max_word[len(sents)] = max_word.get(len(sents), 0) + 1\n",
    "        for t in sents:\n",
    "            max_word_len[len(t.split())] = max_word_len.get(len(t.split()), 0) + 1\n",
    "    for c in char:\n",
    "        sents = re.split(r\" 。 | ， \", c.strip())\n",
    "        max_char[len(sents)] = max_char.get(len(sents), 0) + 1\n",
    "        for t in sents:\n",
    "            max_char_len[len(t.split())] = max_char_len.get(len(t.split()), 0) + 1\n",
    "    s_dct = max_char\n",
    "    s_dct = max_word_len\n",
    "    s_dct = max_char_len\n",
    "    l_lst = sorted(s_dct.items(), key=lambda x:x[0], reverse=False)\n",
    "    p = 0.\n",
    "    for l in l_lst:\n",
    "        p += l[1] / sum(s_dct.values())\n",
    "        print(str(l[0]) + '\\t' + str(l[1]) + '\\t' + str(p))\n",
    "#     print(max_word, max_word_len, max_char, max_char_len)\n",
    "    \n",
    "# check_lens(train_df_final)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3 Word2vec模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6bc6475cd258419db9a25cf30a2e54c1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=15325), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集语料: 15325\n",
      "总长度:  15325\n",
      "保存模型...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4d1c49ee73334940914556a146ef305f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=2352), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8a7b0e42f9ae4dec92f4979a663f8a59",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=15325), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集语料: 15325\n",
      "总长度:  15325\n",
      "保存模型...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d7ba2f809cbd4a41affd34e8eeef00e8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=8796), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OK\n"
     ]
    }
   ],
   "source": [
    "# 训练词向量\n",
    "def train_w2v_model(type='char', min_freq=2, size=100):\n",
    "    sentences = []\n",
    "\n",
    "    if type == 'char':\n",
    "        corpus = np.concatenate((train_cut_df['char'], test_cut_df['char']))\n",
    "    elif type == 'word':\n",
    "        corpus = np.concatenate((train_cut_df['word'], test_cut_df['word']))\n",
    "    for e in tqdm(corpus):\n",
    "        sentences.append([i for i in e.strip().split() if i])\n",
    "    print('训练集语料:', len(corpus))\n",
    "    print('总长度: ', len(sentences))\n",
    "    model = Word2Vec(sentences, size=size, window=5, min_count=min_freq)\n",
    "    model.itos = {}\n",
    "    model.stoi = {}\n",
    "    model.embedding = {}\n",
    "    item_to_id = {}\n",
    "    print('保存模型...')\n",
    "    \n",
    "    for k in tqdm(sorted(list(model.wv.vocab.keys()), reverse=True)):\n",
    "        item_to_id[k] = model.wv.vocab[k].index\n",
    "    os.makedirs('../../data/word2vec_models/', exist_ok=True)\n",
    "    model.wv.save_word2vec_format('../../data/word2vec_models/word2vec.{}.{}d.model.txt'.format(type, size), binary=False)\n",
    "    pickle.dump(item_to_id, open('../../data/{}_item_to_id.pkl'.format(type), 'wb'))\n",
    "    words = model.wv.vocab\n",
    "    with open('../../data/word2vec_models/word2vec.{}.{}d.vocab.txt'.format(type, size, min_freq),'w') as f:\n",
    "        f.write('<S>\\n</S>\\n<UNK>\\n')  # bilm-tf 要求vocab有这三个符号，并且在最前面\n",
    "        for word in words:\n",
    "            f.write(word+'\\n')\n",
    "\n",
    "    return model\n",
    "# 比赛数据\n",
    "model = train_w2v_model(type='char', min_freq=3, size=300)\n",
    "model = train_w2v_model(type='word', min_freq=3, size=300)\n",
    "# train_df[:3]\n",
    "print('OK')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...\n",
      "1    四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...\n",
      "2    斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...\n",
      "3    这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...\n",
      "4              1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。\n",
      "Name: char, dtype: object\n",
      "0    X V 新 款 低 配 比 以 前 低 配 配 置 方 面 高 了 好 多 ， 森 林 人 ...\n",
      "1    助 力 跟 着 发 动 机 走 ？ 很 奇 葩 啊 ！ 最 起 码 不 是 隔 着 减 速 ...\n",
      "2      只 有 报 警 ， 如 果 你 想 检 验 一 下 可 以 把 一 只 轮 胎 放 点 气 。\n",
      "3    不 太 好 判 断 你 描 绘 的 声 音 ， 后 座 安 全 带 卡 子 也 时 常 发 ...\n",
      "4                                前 雷 达 撞 了 感 觉 作 用 不 大\n",
      "Name: char, dtype: object\n",
      "['当 然 外 观 和 内 饰 了', '提 速 很 不 错 ， 在 路 上 就 是 一 亮 点', '操 控 好 ， 越 来 越 顺 手', '操 控 很 舒 服', '整 车 的 乘 坐 舒 适 度']\n",
      "['因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 换 代 的 车 型 上 ， 因 为 肯 定 会 影 响 价 格 。'\n",
      " '四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 实 车 前 脸 有 点 违 和 感 。 不 过 大 众 的 车 应 该 不 会 差 。'\n",
      " '斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 些 ， 用 料 完 全 一 样 。 我 听 说 过 野 帝 ， 但 没 听 说 过 你 说 这 车 。'\n",
      " '这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 换 一 次 我 妹 夫 E P 0 2 0 可 以 换 三 锅 了'\n",
      " '1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。']\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8faf46b5caca4f148b2ccad2a1ffa6e6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=85325), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 换 代 的 车 型 上 ， 因 为 肯 定 会 影 响 价 格 。\n",
      "0    因 为 森 林 人 即 将 换 代 ， 这 套 系 统 没 必 要 装 在 一 款 即 将 ...\n",
      "1    四 驱 价 格 貌 似 挺 高 的 ， 高 的 可 以 看 齐 X C 6 0 了 ， 看 ...\n",
      "2    斯 柯 达 要 说 质 量 ， 似 乎 比 大 众 要 好 一 点 ， 价 格 也 低 一 ...\n",
      "3    这 玩 意 都 是 给 有 钱 任 性 又 不 懂 车 的 土 豪 用 的 ， 这 价 格 ...\n",
      "4              1 7 价 格 忒 高 ， 估 计 也 就 是 1 4 - 1 5 左 右 。\n",
      "Name: char, dtype: object\n",
      "0    X V 新 款 低 配 比 以 前 低 配 配 置 方 面 高 了 好 多 ， 森 林 人 ...\n",
      "1    助 力 跟 着 发 动 机 走 ？ 很 奇 葩 啊 ！ 最 起 码 不 是 隔 着 减 速 ...\n",
      "2      只 有 报 警 ， 如 果 你 想 检 验 一 下 可 以 把 一 只 轮 胎 放 点 气 。\n",
      "3    不 太 好 判 断 你 描 绘 的 声 音 ， 后 座 安 全 带 卡 子 也 时 常 发 ...\n",
      "4                                前 雷 达 撞 了 感 觉 作 用 不 大\n",
      "Name: char, dtype: object\n",
      "['当 然 外 观 和 内 饰 了', '提 速 很 不 错 ， 在 路 上 就 是 一 亮 点', '操 控 好 ， 越 来 越 顺 手', '操 控 很 舒 服', '整 车 的 乘 坐 舒 适 度']\n",
      "['因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ， 因为 肯定 会 影响 价格 。'\n",
      " '四驱 价格 貌似 挺 高 的 ， 高 的 可以 看齐 XC60 了 ， 看实车 前 脸 有点 违和感 。 不过 大众 的 车 应该 不会 差 。'\n",
      " '斯柯达 要说 质量 ， 似乎 比 大众 要 好 一点 ， 价格 也 低 一些 ， 用料 完全 一样 。 我 听说 过野帝 ， 但 没听说过 你 说 这车 。'\n",
      " '这 玩意 都 是 给 有钱 任性 又 不 懂车 的 土豪 用 的 ， 这 价格 换 一次 我 妹夫 EP020 可以 换 三锅 了'\n",
      " '17 价格 忒 高 ， 估计 也 就是 14 - 15 左右 。']\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fcf76a42510f4405ad6adaacfe8fb26b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=85325), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "因为 森林 人 即将 换代 ， 这套 系统 没 必要 装在 一款 即将 换代 的 车型 上 ， 因为 肯定 会 影响 价格 。\n",
      "OK\n"
     ]
    }
   ],
   "source": [
    "# 加入汽车之家数据\n",
    "# 训练词向量\n",
    "def train_w2v_model(type='char', min_freq=2, size=100):\n",
    "    sentences = []\n",
    "\n",
    "    print(train_cut_df['char'][:5])\n",
    "    print(test_cut_df['char'][:5])\n",
    "    print(char_corpus[:5])\n",
    "    if type == 'char':\n",
    "        corpus = np.concatenate((train_cut_df['char'], test_cut_df['char'], char_corpus))\n",
    "    elif type == 'word':\n",
    "        corpus = np.concatenate((train_cut_df['word'], test_cut_df['word'], word_corpus))\n",
    "    print(corpus[:5])\n",
    "    for e in tqdm(corpus):\n",
    "        print(e)\n",
    "        return\n",
    "        sentences.append([i for i in e.strip().split() if i])\n",
    "    print('训练集语料:', len(corpus))\n",
    "    print('总长度: ', len(sentences))\n",
    "    model = Word2Vec(sentences, size=size, window=5, min_count=min_freq)\n",
    "    model.itos = {}\n",
    "    model.stoi = {}\n",
    "    model.embedding = {}\n",
    "    item_to_id = {}\n",
    "    print('保存模型...')\n",
    "\n",
    "    for k in tqdm(sorted(list(model.wv.vocab.keys()), reverse=True)):\n",
    "        item_to_id[k] = model.wv.vocab[k].index\n",
    "    os.makedirs('../../data/word2vec_models/', exist_ok=True)\n",
    "    model.wv.save_word2vec_format('../../data/word2vec_models/word2vec.{}.{}d.model.cars-home.txt'.format(type, size, min_freq), binary=False)\n",
    "    pickle.dump(item_to_id, open('../../data/{}_item_to_id.cars-home.pkl'.format(type), 'wb'))\n",
    "    return model\n",
    "\n",
    "# with open('../../data/data_cars_home/all_text.txt') as f:\n",
    "#     corpus = [''.join(line.strip().split(' ')[1:]) for line in f]\n",
    "#     word_corpus = [' '.join(jieba.cut(s, cut_all=False)) for s in tqdm(corpus)]\n",
    "#     char_corpus = [' '.join(list(s)) for s in tqdm(corpus)]\n",
    "\n",
    "\n",
    "model = train_w2v_model(type='char', min_freq=3, size=300)\n",
    "model = train_w2v_model(type='word', min_freq=3, size=300)\n",
    "# train_df[:3]\n",
    "print('OK')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 2.4 glove模型训练数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def data_prepare(type='word'):\n",
    "    sentences = []\n",
    "    if type == 'char':\n",
    "        corpus = train_df_final['char']\n",
    "    elif type == 'word':\n",
    "        corpus = train_df_final['word']\n",
    "    for e in tqdm(corpus):\n",
    "        sentences.append(e)\n",
    "    with open('../../data/{}.glove.txt'.format(type), 'w') as f:\n",
    "        f.write(' '. join(sentences))\n",
    "# data_prepare('word')\n",
    "# data_prepare('char')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.4 TF-idf 特征训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data3/jzzhou/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=True'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass sort=False\n",
      "\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "tfidf_df = pd.concat((train_A_df, train_B_df, test_A1_df, test_A2_df, test))\n",
    "tfidf_df.to_csv('../../data/csvs/tfidf.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "256"
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 训练tf-idf特征\n",
    "os.system('python train_lsa.py char 250')\n",
    "# os.system('python train_lsa.py word 250')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "# $$下面的不要运行了$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "# $$三、数据增强(option)$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-95-a1ca3a4cc286>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     18\u001b[0m                 \u001b[0menhance_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0menhance_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'article'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mb_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'word_seg'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0ma_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'class'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'c_numerical'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mc_n\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0menhance_df\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0menhance_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_enhance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     21\u001b[0m \u001b[0menhance_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../../data/Enhance.train.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0menhance_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfontsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-95-a1ca3a4cc286>\u001b[0m in \u001b[0;36mrun_enhance\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrun_enhance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mmax_len\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0menhance_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'article'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'word_seg'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'class'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'c_numerical'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'train_df' is not defined"
     ]
    }
   ],
   "source": [
    "# 随机数添加数据\n",
    "import random\n",
    "\n",
    "def run_enhance():\n",
    "    max_len = train_df['class'].value_counts().values[0]\n",
    "    enhance_df = train_df.copy()[['article', 'word_seg', 'class', 'c_numerical']]\n",
    "    \n",
    "    for c in tqdm(enhance_df['class'].value_counts().index):\n",
    "        c_data = enhance_df[enhance_df['class'] == c]\n",
    "        if len(c_data) * 2 < max_len:\n",
    "            for a, b, c_n in zip(c_data['word_seg'].values, c_data['article'].values, c_data['c_numerical'].values):\n",
    "                a_lst = a.split()\n",
    "                b_lst = b.split()\n",
    "                random.shuffle(a_lst)\n",
    "                random.shuffle(b_lst)\n",
    "                a_str = ' '.join(a_lst)\n",
    "                b_str = ' '.join(b_lst)\n",
    "                enhance_df.loc[enhance_df.shape[0]+1] = {'article': b_str, 'word_seg': a_str, 'class': c, 'c_numerical':c_n}\n",
    "    return enhance_df\n",
    "enhance_df = run_enhance()\n",
    "enhance_df.to_csv('../../data/Enhance.train.csv')\n",
    "enhance_df['class'].value_counts().plot.bar(figsize=(10,6), fontsize=20, rot=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# 卡方统计量提取数据\n",
    "chi_dic = {}\n",
    "enhance_df[:1]\n",
    "\n",
    "N = len(train_df)\n",
    "\n",
    "all_word = set((' '.join(train_df['word_seg'])).split())\n",
    "all_char = set((' '.join(train_df['article'])).split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def Feature_Select(df, mode='chi', feature='word', num=1000):  #  chi mi ig df ts\n",
    "    all_text = []\n",
    "    label_set = set()  # 'corpus_txt'\n",
    "    feature = 'word_seg' if feature == 'word' else 'article'\n",
    "    for idx, line in df.iterrows():\n",
    "        data = line[feature].strip().split()\n",
    "        all_text.append((line['c_numerical'], data))\n",
    "        label_set.add(line['c_numerical'])\n",
    "    categories = list(sorted(label_set))  # 排序，确保每次生成的类别列表顺序唯一\n",
    "    cat_to_id = dict(zip(categories, range(len(categories))))\n",
    "    print(\"类别列表id：\", cat_to_id)\n",
    "\n",
    "    word_text_map = {}\n",
    "    label_num = [0] * len(categories)\n",
    "    for text in all_text:\n",
    "        label_i = cat_to_id[text[0]]\n",
    "        label_num[label_i] += 1\n",
    "\n",
    "        t = sorted(set(text[1]))  # 去重，并保持原来顺序，不保持原序结果不一样\n",
    "        for w in t:\n",
    "            if w not in word_text_map:\n",
    "                word_text_map[w] = [0] * len(categories)\n",
    "                word_text_map[w][label_i] += 1\n",
    "            else:\n",
    "                word_text_map[w][label_i] += 1\n",
    "\n",
    "    N = len(all_text)\n",
    "    print('文本总数量：', N)\n",
    "\n",
    "    word_text_list = []\n",
    "    word_list = []\n",
    "    for k in word_text_map:\n",
    "        word_list.append(k)\n",
    "        word_text_list.append(word_text_map[k])\n",
    "    \n",
    "    A_array = np.array(word_text_list, dtype=np.float)  # A\n",
    "    sign_array = np.sign(A_array)  # 当A=0时，chi=0\n",
    "    word_num_each_label = np.sum(sign_array, 0)\n",
    "    print('每个类别的词数量：', word_num_each_label)\n",
    "    label_num = np.array(label_num, dtype=np.float)\n",
    "    B_array = np.reshape(np.sum(word_text_list, 1), (-1, 1)) - A_array\n",
    "    C_array = label_num - A_array\n",
    "    D_array = N - A_array - B_array - C_array\n",
    "    word_num = len(word_list)\n",
    "    print('总词数：', word_num)\n",
    "\n",
    "    def chi_label():\n",
    "        # N * (AD - BC)^2  /  ((A+C)(B+D) (A+B)(C+D))   # 简化版 对于同一个类别：(AD - BC)^2  /  ((A+B)(C+D))\n",
    "        chi_square_value = (A_array*D_array - B_array*C_array) ** 2 / ((A_array+B_array) * (C_array+D_array))\n",
    "        # chi_square_value = (A_array*D_array - B_array*C_array) ** 2 * N / (\n",
    "        #                         (A_array+C_array)*(B_array+D_array)*(A_array+B_array)*(C_array+D_array))\n",
    "        chi_square_value = sign_array * chi_square_value\n",
    "        \n",
    "        feature_word = {}\n",
    "        for i in range(len(categories)):\n",
    "            cur_chi = chi_square_value[:, i]\n",
    "            order = np.argsort(cur_chi)  # 返回排序后的索引，正向排序\n",
    "            # print(word_num_each_label[i])\n",
    "            min_n = min(num, int(word_num_each_label[i]))\n",
    "            # top_80 = int(word_num_each_label[i] * 0.5)\n",
    "            for i in order[-min_n:]:\n",
    "                feature_word[i] = 1\n",
    "        chose_w = [word_list[i] for i in feature_word]\n",
    "        print('提取的特征词的数量：', len(chose_w))\n",
    "        return chose_w\n",
    "    \n",
    "    def chi_mode(mode):\n",
    "        # N * (AD - BC)^2  /  ((A+C)(B+D) (A+B)(C+D))   \n",
    "        chi_square_value = (A_array*D_array - B_array*C_array) ** 2 * N / (\n",
    "                                (A_array+C_array)*(B_array+D_array)*(A_array+B_array)*(C_array+D_array))\n",
    "        chi_square_value = sign_array * chi_square_value\n",
    "        \n",
    "        if mode == 'max': chi_square_value = np.max(chi_square_value, axis=1).reshape((-1, 1))\n",
    "        elif mode == 'avg': chi_square_value = np.mean(chi_square_value, axis=1).reshape((-1, 1))\n",
    "        else: raise ValueError(\"参数有误\")\n",
    "\n",
    "        order = np.argsort(chi_square_value[:, 0])  # 返回排序后的索引，正向排序\n",
    "        chose_w = [word_list[i] for i in order[-num:]]\n",
    "        return chose_w\n",
    "    return chi_label()\n",
    "w_num = 100000\n",
    "c_num = 20000\n",
    "w = Feature_Select(train_df, num=num, feature='word')\n",
    "c = Feature_Select(train_df, num=num, feature='char')\n",
    "pickle.dump(w, open('../../data/chi_words_{}.pkl'.format(w_num), 'wb'))\n",
    "pickle.dump(c, open('../../data/chi_char_{}.pkl'.format(c_num), 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "pred1 = pickle.load(open('../../data/result-op5/lstmgruword_oof_f1_0.7686691377795438_a0.7832944028205417.pkl', 'rb'))\n",
    "pred2 = pickle.load(open('../../backup/result-op3-0907/svmword_pre_f1_0.7755615966138534_a0.7900114681760386.pkl', 'rb'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def get_noise(pred):\n",
    "    lines = []\n",
    "    for i, p in enumerate(tqdm(pred)):\n",
    "        top2 = np.sort(p)[-2:]\n",
    "        if top2[0] >= 0.15 and top2[1] <= 0.7:\n",
    "            lines.append(i)\n",
    "    return lines\n",
    "line1 = set(get_noise(pred1))\n",
    "line2 = set(get_noise(pred2))\n",
    "print(len(line1))\n",
    "print(len(line2))\n",
    "cross_noise = line1 & line2\n",
    "cross_noise = list(cross_noise)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "# $$四、LM(option)$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "## Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# import torchtext\n",
    "# from torchtext import data, vocab\n",
    "# from torchtext.datasets import language_modeling\n",
    "\n",
    "# from fastai.rnn_reg import *  # 各种drop\n",
    "# from fastai.rnn_train import *\n",
    "# from fastai.nlp import *  # LMData\n",
    "# from fastai.lm_rnn import *  # 各种模型\n",
    "from fastai.text import *\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "BOS = 'xbos'  # beginning-of-sentence tag\n",
    "FLD = 'xfld'  # data field tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20000/20000 [00:02<00:00, 7939.87it/s]\n",
      "100%|██████████| 5000/5000 [00:00<00:00, 54543.31it/s]\n"
     ]
    }
   ],
   "source": [
    "def get_corpu(type='word'):\n",
    "    sentences = []\n",
    "    val_sents = []\n",
    "    if type == 'char':\n",
    "#         corpus = pd.concat((train_A1_df['char'], train_B1_df['char'], test_A1_df['char'], test_A2_df['char'], w2v_df['char']))\n",
    "        corpus = train_B1_df['char']\n",
    "        val_cor = test_B1_df['char']\n",
    "    elif type == 'word':\n",
    "#         corpus = pd.concat((train_A1_df['word'],train_B1_df['word'], test_A1_df['word'], test_A2_df['word'], w2v_df['word']))\n",
    "        corpus = train_B1_df['word']\n",
    "        val_cor = test_B1_df['word']\n",
    "    for e in tqdm(corpus):\n",
    "        sentences.append((f'\\n {BOS} {FLD} 1 ' + e).split(' '))\n",
    "    for e in tqdm(val_cor):\n",
    "        val_sents.append((f'\\n {BOS} {FLD} 1 ' + e).split(' '))\n",
    "    return sentences, val_sents\n",
    "tok_trn, tok_val = get_corpu('word')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "freq = Counter(p for o in tok_trn for p in o)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('。', 363133), ('的', 205775), ('我', 169351), ('您', 168925), ('是', 126523)]"
      ]
     },
     "execution_count": 446,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freq.most_common(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26259"
      ]
     },
     "execution_count": 193,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(freq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "max_vocab = 60000\n",
    "min_freq = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq]\n",
    "itos.insert(0, '_pad_')\n",
    "itos.insert(0, '_unk_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10363"
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})\n",
    "len(stoi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_lm = np.array([[stoi[o] for o in p] for p in tok_trn])\n",
    "val_lm = np.array([[stoi[o] for o in p] for p in tok_val])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 445,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10363, 20000)"
      ]
     },
     "execution_count": 445,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vs = len(itos)\n",
    "vs, len(trn_lm)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "## Embedding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Vocabulaty size :  10363\n",
      "create embedding matrix\n",
      "(37865, 300)\n",
      "100%|██████████| 10363/10363 [00:00<00:00, 376289.46it/s]\n"
     ]
    }
   ],
   "source": [
    "# 加载预训练词向量\n",
    "import pickle\n",
    "\n",
    "def init_embedding(item_to_id, model_file):\n",
    "    vocab_len = len(item_to_id)\n",
    "    print('Vocabulaty size : ', vocab_len)\n",
    "    print('create embedding matrix')\n",
    "\n",
    "    def get_coefs(word, *arr): \n",
    "        return word, np.asarray(arr, dtype='float32')\n",
    "    embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(model_file).readlines()[1:])\n",
    "\n",
    "    all_embs = np.stack(embeddings_index.values())\n",
    "    print(all_embs.shape)\n",
    "    embed_matrix = np.random.normal(all_embs.mean(), all_embs.std(), size=(vocab_len, em_sz))\n",
    "\n",
    "    for word, i in tqdm(item_to_id.items()):\n",
    "        embedding_vector = embeddings_index.get(word)\n",
    "        if embedding_vector is not None:\n",
    "            embed_matrix[i] = embedding_vector\n",
    "    return embed_matrix\n",
    "\n",
    "new_w = init_embedding(stoi, '../../data/word2vec_models/word2vec.word.300d.mfreq2.model.txt')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "wgts = {}\n",
    "wgts['0.encoder.weight'] = T(new_w)\n",
    "wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(new_w))\n",
    "wgts['1.decoder.weight'] = T(np.copy(new_w))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "## Language model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 548,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "wd=1e-7\n",
    "bptt=70\n",
    "bs=52\n",
    "em_sz = 300  # size of each embedding vector\n",
    "nh = 1150     # number of hidden activations per layer\n",
    "nl = 3       # number of layers\n",
    "PATH = '../../data/lm'\n",
    "opt_fn = partial(optim.Adam, betas=(0.8, 0.99))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 549,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5000"
      ]
     },
     "execution_count": 549,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(val_lm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 550,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_dl = LanguageModelLoader(np.concatenate(trn_lm), bs, bptt)\n",
    "val_dl = LanguageModelLoader(np.concatenate(val_lm), bs, bptt)\n",
    "\n",
    "md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 551,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 552,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner = md.get_model(opt_fn, em_sz, nh, nl, \n",
    "    dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])\n",
    "\n",
    "learner.metrics = [accuracy]\n",
    "learner.freeze_to(-1)\n",
    "# 更新权重\n",
    "model_dict = learner.model.state_dict()\n",
    "model_dict.update(wgts)\n",
    "learner.model.load_state_dict(model_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 554,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.lr_find()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 474,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEMCAYAAAAoB2Y1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmcXFWZ//HPU9X7kt6z72SDhCAQSEIIIIERRdxmUAEdESHIuAGuzOioP8eZcRnHBTdExAUUXBE14hAS2RKSDpuJEEJCErL2lvS+9/P7oypJE9Lp6k5X3Vq+79crr7vUrXuek+5+6tS5955j7o6IiGSGUNABiIhI4ijpi4hkECV9EZEMoqQvIpJBlPRFRDKIkr6ISAZR0hcRySBK+iIiGURJX0Qkgyjpi4hkkKygAzhaZWWlT506NegwRJLb5s2R5ezZwcYhSWPDhg117l412HFJl/SnTp1KdXV10GGIJLcLLogsV68OMgpJIma2I5bj1L0jIpJBlPRFRDKIkr6ISAZR0hcRySBK+iIiGURJX0Qkgyjpi4gkgfuf2cNze5viXk5ckr6ZZZvZ/dH1QjO7z8weM7Mvx6M8EZFU5u7cfO/T/P6ZPXEva8STvpnlAxuAi6O7rgLWuvsSYK6ZnTzSZYqIpLK2rl66e53S/Oy4lzXiSd/d2919PrAruusgUGRmYSAf6BrpMkVEUlljezcAJamY9I/ht8AlwFbgOXffevQBZrbczKrNrLq2tjYBIYmIJI+DbemV9G8BvuvuU4FyMzvn6APc/TZ3X+DuC6qqBh0vSEQkrRxu6RekR9IvBjqi651AUQLKFBFJGY3tkV7vdGnpfxu4wczWEOnTX5mAMkVEUsahln5pQU7cy4rb0MruPiO63A4siVc5IiKpLt369EVE5Dga27vJChmFOeG4l6WkLyISsIPt3ZTkZ2NmcS9LSV9EJGCN7d0JuXMHlPRFRALXFG3pJ4KSvohIwA62dSdkCAZQ0hcRCVyjWvoiIpnjYFuXkr6ISCbo7XOaO3soScCDWaCkLyISqOaObtwT82AWKOmLiATq8BAMSvoiIukvkUMwgJK+iEigjgy2pqQvIpL2DiZw1ixQ0hcRCVQiJ1ABJX0RkUA1qaUvIpI5DrZ1kZ8dJjcr/sMqg5K+iEigEjkEAyjpi4gE6mCbkr6ISMZI5Fj6oKQvIhIode+IiGSQxvbEjaUPSvoiIoFKiz59M8s2s/v7bX/CzNaa2QozS8z4oSIiSa6zp5f27t6EDcEAcUj6ZpYPbAAujm5PB+a6+yJgBTBxpMsUEUlFjQl+MAvikPTdvd3d5wO7oruWAWVm9jCwFHhppMsUEUlFh5/GTdAEKpCYPv0qoNbdzyPSyj/36APMbLmZVZtZdW1tbQJCEhEJXqKHVYbEJP0mYHN0fRsw4egD3P02d1/g7guqqqoSEJKISPAaWrsAKE+zlv4GYEF0fQaRxC8ikvEOJ/2iNEr67r4GqDez9cBmd18X7zJFRFJBfQAt/ax4ndjdZ/RbvyFe5YiIpKqG1sgIm/k5iRlhE/RwlohIYA60dlFemNhHl5T0RUQCUt/aRUUC+/NBSV9EJDANaumLiGQOJX0RkQxS39pJhZK+iEj6a+vqoaO7j/LC3ISWq6QvIhKA+pbIPfpq6YuIZIDDT+Mq6YuIpL8ghmAAJX0RkUAcGoJB3TsiIhngQDTplynpi4ikv/rWLrLDRnFu3IZAOyYlfRGRADS0dlJemIOZJbRcJX0RkQBEnsZN7D36oKQvIhKI+tauhF/EBSV9EZFABDHuDijpi4gEoqFFSV9EJCN09vTS3Nmj7h0RkUxwoLUbSPzTuKCkLyKScPWtnUDin8YFJX0RkYQ7MtiabtkUEUl7QY2wCXFK+maWbWb3H7XvZjN7MB7liYikkoaABlsDGPFBH8wsH3gCmNVv3xTgPUDtSJcnIpJqGlq7CBmU5GcnvOwRb+m7e7u7zwd29dv9DeCWkS5LRCQV1bd2UVaQQyiU2HF3YIhJ38zCQy3AzK4EngH+fpxjlptZtZlV19bqy4CIpLegHsyCGJK+md1iZleZ2fXAi2b27SGW8UZgGfAL4Ewz++DRB7j7be6+wN0XVFVVDfH0IiKpJaghGCC2Pv23uPtCM/s9MB1YN5QC3P1KADObCtzu7rcONUgRkXRS39rJ7LHFgZQdS/dOt5ndROQi7ElAb3xDEhFJb0G29GNJ+tcABnwKOBP4QCwndvcZR21vd/eLhhyhiEga6ent42B7NxUBPJgFMXTvuPsLwNcAzOxX7q6WvojIMNW1dOEOo0cFk/QTcSFXRESiapo7ABhdnBdI+XG/kCsiIkfUNEUGWxtdnKQtfXQhV0RkxNQ0R5N+snbvMMwLuSIi8mo1zR2YQWVREl/INbM/AucAT0Uv7IqIyDDUNHdSXpBDdjiYQY5juZD7UeDbwCLgO2Z2c9yjEhFJUzVNnVQF1J8PsV3IvdzdFwGYmQFriN7CKSIiQ1Pb3MHoUcHcuQOx9em3mdk5ZhYCFgNtcY5JRCRt1TR3BnbnDsTW0r8G+ApwMrApui0iIkPU1+fUJnvSd/ftwOXxD0VEJL01tHXR0+eBJn3NkSsikiCHH8wKsE9/wJa+mf0I8KN3A+7u6uIRERmiI0MwJGf3zucSFYSISCY4/DRuQOPuwHGSvrvvSGQgIiLprjbgIRhAffoiIglT09RBcV4WedlDnm58xCjpi4gkSND36IOSvohIwkSSfnD9+aCkLyKSMDXNHYH254OSvohIQrg7NU3Bd+/EMgwDZjYWODx1u7vvjFtEIiJpqKmjh86evsC7dwZN+mb2O6AY2EH04SwGGX/HzLKB37j7ZdHtHwOzgRrgbe7ec4Jxi4iklNpDD2YF3L0TS0t/nLsvjPWEZpYPPAHMim6fC2S5+yIzWw38A/CnYcQqIpKyDg3BEORY+hBbn/4vzexfzKwglhO6e7u7zwd2RXftB74xhPJERNJOMjyNC7G19C+NLv8pOomKu/uFsRbg7lsAzOytQB/wlyFHKSKS4mpSpXvH3V9rZiXANGCbuzcNtRAzexPwYeCyY/Xnm9lyYDnA5MmTh3p6EZGkt7exg8KcMMW5Md0/EzexzJF7BbAa+ASw2szeOZQConf+fBx4o7s3H+sYd7/N3Re4+4KqqqqhnF5EJCXsOdjOuNJ8Ih0mwYnlI+dGYKG7d5lZDvAI8IshlPEeYBzwQLSyd7j7HUOOVEQkhe1t7GB8aX7QYcSU9HuIJO0dwNjo9qDcfUZ0+SXgS8MNUEQkHew52MEp40YFHUZMSf+DwM/NrAKoBT4U35BERNJLZ08vdS2dqdHSd/engHMSEIuISFra1xi5c2dcSbC3a4LumxcRibvdB9sBmJDMLX0zu9rd7zSzz3LUXLnu/v/iHpmISJrYezDa0k/mpA88HV2uTkAcIiJpa0+0pZ8M3TvHmyP36ejyr4kLR0Qk/exp7KCiMCfQaRIPUZ++iEic7TnYnhR37kBsT+R+/KjtB+IXjohI+tnb2J4UXTsQW0v/skMr0ZE2S+IXjohI+tlzMDmexoXj373zHuBq4FQze4jIBCptwH8nJjQRkdTX1NFNS2cP40uTo6V/vAu5PwZ+bGaPDGUoZREROeLQnTvJ0tKPpXvn0v4b0UHXREQkBofv0S9JjqQfy9g7V5jZ+4Bw9PgwMC+uUYmIpIk9jYda+snRvRNLS/9q4A3As0Tmt90az4BERNLJnoPthEMW+DSJh8SS9MNEhlSujB4/Pa4RiYikkb0HOxg7Ko9wKNjJUw6JtaU/Dvhf4B7gh/EMSEQknew+2J40XTtw/Fs2D01W2wJsjq6/m6MGXxMRkYHtbezgNZNKgw7jsONdyP18dDkbKCUyANt8oB04K85xiYikvL4+Z19jB+NOTYGWvru/F8DMHgSWunuvmWUBDyYqOBGRVLavqYOu3j4mlxcEHcphsdyymQ9cambPErlVM3k+skREktiO+jYAppQXBhzJEbFcyL0CeB3wXeCNwFVxjUhEJE3sbGgFYEpFCrX03X0n8IEExCIiklZ21LeRFbKkGWET4jSevpllm9n90fU8M/uDmT1jZj81s+S4WVVEJM52NrQxoSyfrHDyTF0yYCRm9o3ocpWZPRT9tyo64uaAzCwf2ABcHN31LmCXu58GlPXbLyKS1nY2tCXVRVw4/t07H4kuXzuUE7p7OzDfzF6M7roQ+HV0/SHgtcBfhh6qiEhq2VHfxmWnjQs6jFdIxHeOCqAxut4ElCegTBGRQDW2ddPY3p1Ud+5AYpJ+HUdm2yqJbr+CmS03s2ozq66trU1ASCIi8bUjeufOpCTr3klE0l9JZHROiHT1rDr6AHe/zd0XuPuCqqqqBIQkIhJfOxui9+gn0e2acPyxd1bx6nF2DPAhzqR1F/C26MNdzxD5EBARSWuHHsxKpQu5Q7qAe4z3z4guO4k81CUikjF21rdRWZRLYW4sAx8kTvLcPCoikkZ2NLQmXdcOxJj0zazKzCZH/y2Kd1AiIqluR30bU5KsawdiGIbBzH4InETkzps2oA9YGue4RERSVntXL3sbO5hWmVy3a0JsLf05RAZc2wKcTyTpi4jIALbXR27XnJqiSb+RI0MnXE5krlwRERnA9rpI0k/Gln4sl5X/CZgAfBS4Dnh/XCM6QX19Tq87vX2OO/Qf3s0MjMiOcMgIGWj8NxEZaS8lcUs/lqT/VuD37t4M/Huc4xm2P2/cx833Pk1bV++Q3hcyCJlFPhDMjmxzZH8oZITMyAoZ2eEQ4ZAd/tDICoUIhYxwCMKhEGGLfKBkhULkZofICYfIzwmTnx0+sjy0fvR2dFmQEyYvur8gJ4vcrEgZIpIaXqptpao4l6Iku10TYkv6ZcBPzKwb+CNwn7sfjG9YQzepPJ8rzp5MYW4W2SE7nKgB+twxA/fINwEz6HPo6XPcnT53+jzy+tHbfR45ptednl6nu9fp7eujN3qunr4+evsix/X0eeSbRp/T3dtHa1cPnd19dPT00t7VS3t3ZNnTN/S55fOyQ4c/BPKyIx8kBdlZ5OWEyY++lp+TRUVhDlXFuVQV51JZlHt4vTAnrG81Igmyvb41Kbt2ILZJVG4FbjWzAuDjwHYiE6UnlbnjS5g7vmTwA5NAd2/f4Q+Awx8GA20fa9nv2Kb2bmqaemnr6qWtq4cDbd30HuNDJT87TGVxDlVFr/5AqCrKZfSoPKZWFFBakBPA/4hIenmpro1lc0YHHcYxxXLL5hXAJURGy/wrcHq8g0p32eEQ2eEQo/KyR/zcvX3OgbYu6lo6qW2O/Ou/XtvSyfa6NtZvP0BDa9er3l9WkM3UykKmVRYyvbKQaZVFTK0sYFplIQU5yfdVVSTZNHd0U9fSybSqFG3pA+OAz0SnTZQkFw4ZlUWRlvycscc/tru3j/qWyAfE3sYOdtS3sq2ulZdqW1mztZ7fPLn7FcePHZXHtMpCTp1YwhmTyzhjSimji5NnGjiRZPBiTQuQnHfuQGzdO19LRCCSeNnhEGNL8hhbkse8Ca/uGmvr6mF7XRsv1bWyvb6VbbWtvFjbwp2Pbee2h7cBkWspZ00p57xZVSydWUlFUW6iqyGSVDbtaQJg7vhRAUdybPq+LgMqyMnilPGjOOWoX97Onl427m7iyR0HeHLnAVZtruE3T+3GDOaNL+H8WVWcN6uK0yeXkp1Ec4OKJMKW/c0U5WYxoTQ/6FCOSUlfhiw3K8yZU8o4c0oZELmOsHF3Iw+/UMvDW2r57l+3cuuqFynOzeKcGRWcP2s0582qZGJZ8o1DIjLSXtjfwozRRUl7t5ySvpywcMg4bVIpp00q5UPLZtLY3s2arXX89YVa/rq5lgc27Qdgzthi3vSa8bzlNRMYn6StIJETtaWmhQvnJO9kUEr6MuJK8rO5ZN44Lpk3Dndna20LqzfX8qe/7eXLf97MVx7YzLkzKrl8wSReN3cMuVnhoEMWGREHWiM3RswcXRx0KANS0pe4MjNmjC5mxuhirl06nZ31bfzqyV38esMuPvzzpygvzOHyMydy5cLJTKlIzrsdRGK1JXrnzswxRQFHMjAlfUmoyRUF3HzxLG5cNpPHttZx19qd3P7oS3z/4W1cdPJorl06nYXTypO2P1TkeLbUNAMwc4xa+iKvEAoZS2dWsXRmFfubOrjriZ38bO0O3nnbWk6dUMLNF8/itUn6RKPIQLbsb6EwJ8z4kuR9fkX300ngxozK4+aLZ/H4py7ki2+dR3NHN++9cz3vuWMdL0ZbTiKp4IX9zcwYU5zU31SV9CVp5GWHuWrhFP5y0/l8+tKTeXLnAS75+iN8/v5NNLZ1Bx2eyHG5O8/va2ZOEnftgJK+JKGcrBDXLp3O6o9dwNvPmsSPH9/OBV9dxU/XbKenVxO3SXKqbemkobWLOeOU9DGzQjO7z8weM7MvJ6JMSX0VRbn851tP5Q8fWsrsscV85r5NXPrNR3nsxbqgQxN5lef3Rroi54xNzuEXDklUS/8qYK27LwHmmtnJCSpX0sAp40fx8+sW8b13nUFbdw9X3f4E/37fRjq6hzZhjkg8/X1vZMydOWPV0gc4CBSZWRjIB149pq/IcZgZl8wbx//ddD7XLZ3GT9bs4K3feZyttS1BhyYCwN92NTKxLJ+ywuSekyJRSf+3RMbk3wo85+5bE1SupJm87DD/dukp3HH1AvY1tnPZtx7lVxt2BR2WCM/sOshpE5NufqlXSVTSvwX4rrtPBcrN7Jz+L5rZcjOrNrPq2traBIUkqezCOWNY8ZHzOHVCCR/75TPcfM/TtHb2BB2WZKj6lk52HWhn/sTkn70vUUm/GOiIrncCr3hG2d1vc/cF7r6gqip5ByqS5DK2JI+7r1vEjRfN5HdP7+aN33qUjbsbgw5LMtCz0d+7+WrpH/Zt4AYzW0OkT39lgsqVNBcOGTdeNIu7r1tEW1cPb/vu4/x5496gw5IM8+zLjZH5JCYk9507kKCk7+7b3X2Juy9293e4u267kBG1aHoFf/rwUuaOH8UNdz3Jjx/fHnRIkkGe3XWQ6ZWFFMdh3uuRpoezJG1UFOVy97WLWDZnDJ/9/Sb+a8Vz9PV50GFJmuvu7WPd9obDkwolOyV9SSv5OWG+964zuGrhZL7/123cfO/TdPXoKV6Jn+rtB2ju6OHCOWOCDiUmGmVT0k5WOMR/vGUe40vz+coDm6lt6eR77zozJb56S+pZvbmG7LBx7szKoEOJiVr6kpbMjA+8dgZfvfw0ntjWwNu/v5b9TR2Dv1FkiB7fWs/pk8soyk2NNrSSvqS1fzpzIndcfRY761t523ce11DNMqIOtnWxcU8j55xUEXQoMVPSl7R33qwq7rl+MZ09fbzj+2t5fl9T0CFJmli7rR53WDIjNbp2QElfMsS8CSXce/0issMhrrhtrR7ikhGxenMtxblZKTH8wiFK+pIxplcVcc/1iyjIyeLKH6zlmZcPBh2SpLC+Pueh52s4b1YVOVmpk0pTJ1KRETClopBfLF9ESUE277r9CTbsOBB0SJKiNu1poqa5k2Unp9Zczkr6knEmlRdwz/LFVBTl8M8/fIJ1LzUEHZKkoJXP78cMLpitpC+S9MaX5nPP9YsZU5LHe+5Yx+NbNRuXDM3K52o4Y3IZ5Uk+fv7RlPQlY40Zlcc9yxczqTyfa+5cz+OahlFitL+pg7/tbky5rh1Q0pcMV1Wcy93XLWJKeSHX/Hi95t+VmKx6vgaAZSky9EJ/SvqS8SqLcrn7uoVMKS/kfUr8EoOVz9cwoTSfWWOKBj84ySjpixAdofO6hUytKOSaO9fzwKZ9QYckSaq1s4dHttRy0cmjMbOgwxkyJX2RqIqiXO66diFzxo3i/T/bwE/X7gg6JElCDz63n47uPi6dPz7oUIZFSV+kn4qiXO5Zvohlc0bzmd9t5KsPbNaY/PIKv9qwi3EleSxIkfHzj6akL3KUvOww333Xmbx9wURuXfUi1/6kmsb27qDDkiSwva6VR7bU8c6zJhMKpV7XDijpixxTdjjEl/5xPl9481wefqGWN9/6KJv3aYTOTHfXEzvIChnvPHtS0KEMm5K+yADMjHcvnsrPly+itauXN3/7UX61YVfQYUlAWjt7+MX6l3nd3LGMGZUXdDjDpqQvMoizppbzxw+dy2kTS/nYL5/hpnuepqWzJ+iwJMF+Wf0yzR09vG/ptKBDOSFK+iIxGD0qj7uvW8SNF83kvqd384ZvPMKGHRqzJ1P09Tl3Pr6d0yeXcsbk1LyAe0jCkr6ZfcLM1prZCjNLrcEqRIBwyLjxolncc/1i+ty5/Htr+NzvN3GgtSvo0CTOVm2uYXt9G9csSe1WPiQo6ZvZdGCuuy8CVgATE1GuSDycNbWcFR9ZyhVnT+bHa7az9Mur+PqDL9DUoTt80tXtj7zE2FF5XDJvbNChnLBEtfSXAWVm9jCwFHgpQeWKxEVxXjZffOupPHDjeSyZUcHXH9zCeV9exXdWv6j+/jTz2It1rNlWz3XnTSc7nPo94omqQRVQ6+7nEWnln9v/RTNbbmbVZlZdW1uboJBETtysMcV8/90LuP+D53L6pFK+/OfNLP6vlfzXiufY19gRdHgyAr710BbGjMrlqoWTgw5lRCQq6TcBm6Pr24AJ/V9099vcfYG7L6iqqkpQSCIj59SJJfzovWdz3weWcP6sKn7w8DbO/dJD3HzP02zao/l4U9WTOw+wdlsD1y2dTl52OOhwRkRWgsrZANwUXZ9BJPGLpJ3TJpVy65Vn8HJDG3c89hL3rH+Z3zy1m3kTRvHPi6dy6anjKMxN1J+dnKhvrtxCaUE2V5ydHq18SFBL393XAPVmth7Y7O7rElGuSFAmlRfw2cvmsuZTy/jcZafQ1dPHJ371LGd84f+4/qfV3P/MHpp14Tep3f3ETlZvruWG809Kqw/qhNXE3W9IVFkiyaKkIJurl0zjPedMZf32A/zx2T2s2LiPBzbtJytknDmljEvmjeX188YxtiR1n/JMN09sq+ezv9/I+bOquHbp9KDDGVHmnlwjCC5YsMCrq6uDDkMkbnr7nA07DrBqcw0rn9vPC/tbADhzShlvOHUc55xUwewxxccf0OuCCyLL1avjHm+meXRLHTfctYGq4lx++y9LKMnPDjqkmJjZBndfMNhx6fOdRSRFhEPG2dPKOXtaOZ+8ZA4v1rSw4m97+dPGfXzhD38HoLwwh8UnVbDkpEqWzqxkUnlBwFFnhp+v28mnf7eRmaOL+OHVZ6VMwh8KJX2RgM0YXcSHls3kQ8tmsrO+jfXbG3jsxToe21rHH5/dC8CE0nxOGT+KU8aNYuH0chb2OeEUHdo3GXV09/LfK57nzse3c/6sKm698nSK89Iv4YOSvkhSmVxRwOSKAv7xzIm4O1trW3hkSx3VOw7w/N4mVj63n2+shF9sbyAvO8zXf7SOs6aWs3BaOfMnlpKTlfoPDyWSu/O7p3fz9Qe3sKO+jfcumcq/veFkstLgIayBKOmLJCkzY8boYmaMLua90TFfWjp7eGJbPRNXFNDW1cOeg+185YHIIzC5WSFmjy1mzthiZo8dxUlVhcwcU8z4kryUnMs13g60dvGvv/0bKzbuY/aYYn72voWcO7My6LDiTklfJIUU5Wax7OQxUJYPwF9uOp+G1i7WvVRP9fYDPLeviZXP1XBv9ZFx/0cX53LmlDLOnFLG0plVnFRVmNYt2Vg8/mIdn/j1s+xv6uCW189h+XnTM+aDUUlfJMWVF+ZwybxxXDJv3OF9dS2dbKttZfO+Jp7ceZDqHQ2s2LgPeI5wyBhXkseUigImlxcwubzw8PqsMcVp20Xk7qzZWs8dj73Eg8/VMKWigF++/xxeM6k06NASSklfJA1VFuVSWZTL2dPKeffiyL69je08sqWOnfVtvHygjR31bTywaT8N/YaGzssOMXd8CbPGFDFrTDGzxxZz8thRlBWm7mjoL+xv5rEXIxfFq3ccoDg3i4+/bjbvO3da2gytMBRK+iIZYlxJPm9f8Oq5XZs7utnZ0MZLda1s2HGATXuaWLFxHz9f9/LhYyaV5/Pa2aOZN76ECWX5jMrLpiQ/m4LcMEW5WUmVPJs7unliWwNP7jzAQ8/X8Hx0buOJZfl88a3zeNNp49P2zpxYKOmLZLjivGzmji9h7vgS3jh/PBDpCqlt7mTz/mb+vqeJ9dsbuLf6ZX7SvePY54gOUxAOGxNK85lSUcCksgImluWTn5NFaX42VcW5jB6Vy6i8bHKyQkMeprirp4+2rh7aunoPL1s6eth1oJ1ed7bWtLBuewOb9jTR2+eEDOZPLOXzb5rLxaeMYXxp/on9R6UJJX0ReRUzY/SoPEaPymPpzCquP/8kevuclxva2NfUQVN7N00dPbR39dDU0UNtcychM7p6e9l1oJ3n9zbz4HM1dPX0DVhGTlaI8oIcxpfmkRUOYUB2OBRJ7t09tHX20tbVS2tXD+1dvfT0HX/0gJysEKdPKuVfLjiJxSdVcMbksqT6BpIslPRFJCbhkDG1spCplYUxHd/X59S1dNLe3cvBtm5qmzupbemkqb2bzp4+Wjt7qG3ppKapk+7ePhxo6+ohJyvE6OI8CirCFOSEKcjJii77redmUZAd2TehLJ/scIiKohxys5TkB6OkLyJxEQpFvi0ATKkIOBg5LD3vzRIRkWNS0hcRySBK+iIiGURJX0Qkgyjpi4hkECV9EZEMoqQvIpJBlPRFRDJI0k2Mbma1wEGgsd/ukn7bx1qvBOpOsOj+5x3OMcd6bbB9R79+aLv//hOtWyz1Guy4WOp2vO2B1tO1bsn6+3is/bHULRV/Zkfvy4TfxynuXjVo6e6edP+A2wbaPtY6UD3SZQ71mGO9Nti+gep51DEnVLdY6jUSdRvqzyyd65asv4/DrVsq/sxi+Tmlat0G2x7sX7J279x/nO2B1ke6zKEec6zXBts3UD0TXa/Bjoulbsn6MxvsuFSu21Drdaz9sdQtFX9mR+9Llp/ZYMed6O/joJKue2c4zKza3RcEHUc8qG6pJ13rBapbOkjWlv5Q3RZ0AHGkuqWedK0XqG4pLy1a+iIiEpt0aemLiEgMlPRFRDKIkr6ISAZJm6T9qU0kAAAGiklEQVRvZllmNj26bmb2GTP7gpl9MOjYTsQx6jXTzP7ZzG4POrYTdYy6fST6c/tJ0LGdqGPU7Xoz+76ZfT/o2E7UMep2vpn9wswmBh3bSDGzi83sf8zsm2ZmQcczklIm6ZtZtpnd3287z8z+YGbPmNlPgV7gk2Z2A/AjoA9oAWYGE3FshlGv0UBKXH0fRt0eAVYBncFEHLth1O1p4CYgtglmAzSMujUDzwcU7pANVr9okr/c3T8KbALmBxZsHKRE0jezfGADcHG/3e8Cdrn7aUBZ9LVad/8ukYcV/uzuXwLyEh1vrIZZr38HJgDzzawywSHHbJh1W+bujwItZlaU6JhjNcy6hYDPAJ9LbLRDM8y69SU80GEaQv0ONayMFKpfLFIi6bt7u7vPB3b1230h8H/R9YeA1wJFZvZh4DXAEjP70lHvSSrDrNe33P2/gWfd/UTHd4mbYdZtr5ndCmQBrYmMdyiG+3MDCoB3mlluIuMdimHWLWW6dYZQv3vN7KvAHHf/W4LDjKusoAM4ARUcGXSoCZgNvN+jDx6YmR1aTzGD1gvA3a8NJrwTEsvP7GdBBXeCBqvbv6fo7yPE9nP7Q1DBjYBX1c/dVwIrgwspflKipT+AOiKjyxFd1vX/o0rhP7B0rReobqkqnesGx6hfgLHEXSon/ZXAP0TXLyRyATAdpGu9QHVLVelcN0j/+r1CKif9u4AJZvYs0ED6fBVL13qB6paq0rlukP71ewWNvSMikkFSuaUvIiJDpKQvIpJBlPRFRDKIkr6ISAZR0hcRySBK+iJxYGazzSzl/r7MrMrMqoKOQ+In5X4pRZKdmY0B/gMIBx3LMOQD30q34YTlCCV9GTYzWx3n8481s3+NZxkDlHv1CZ7i08DH3b370PlG4Jwjwsw+Z2YXDPS6u+8EfgtcmbCgJKGU9CVpufs+d//PAIq+erhvjA7dW+Du20csmsS7F3h90EFIfCjpy4gwswIz+5WZPWZm347uG2dmj5jZGjP7YnTfBWb2teix/xHdt9rMPm5m68zs9/3OOdXM7uy3fadFZtZaY2aPRye/GGNmj5rZ+ujr1w0Q31Qzu8vMbjezO6L75kbf94SZ3WBmJWb2KHB69JyfjB63OFqvDWZ28bHO388sYOMg/1evi9Z1nZldFN33DjN70sweNLN7zWzqAO99WzTmJ83skui+10fr8JSZva3f+Z4xs7Vmtug4sVwffe8jZjYJDg+g1j1IPSVFKenLSFkObHT3JcA4M5sPTAL+jchgVm/qd+w7gFvc/dP99nW4+9lAsZmNP045pe6+GNgMnAGcA6wA3gJUuPsPjvPey4Db3f2a6PYE4H3AG4Fr3L3R3c8FnnL3c6OT8AB8h8hEG/9ApK/+ePKBtoFejF7c/d/ouS4Bvhl96cpofKuAe47zTeG9wPuj7z10vq8RmfjjfGBJ9LgSYCnweeA9A8QyGvhI9D1fITKz1+GXB66ipLJUHk9fksts4Jxof3EpkYS6h0jSbwX6z4T1c3ffctT7fxRd7gByjlPO0cdtJTIj1esZfFaqv7j72n7bvcB/EhlK93h/C9P6lZs/SBkvA28+zuuVQIO7HwQwsyYzKwOeBX4B1AO3Huf9nwduIVL3r0bPV+fuTdHz3RI9rgC4m8gAYgPN/DSNyFjyDxKp/+Z+r6XiRWiJgZK+jJTNwDp3/5GZvZlI8vsk8GXgGSJJ7ZCWo9/s7q/aN4Cjj3sL8D53f2oY7/0c8E4iyf8v/fa3m1kh0Bbt6thIpBXeDnz0eAW4+24zO8nMwu7ee4xDaoFyMysh8k272N0PmNk8d18aQx1eF415GnAHkdZ9pZmNitZjvZmdBtzg7rPN7FLg8gHOtQ34m7tfZGaTgQsAous1McQiKUhJX0bKD4BDfeoHgCuIzJ/6A2A30DpIt81wbQDuM7NtRKbA+6S7747xvb8BHiCS/LLMLM/dO4DbiEyb10Sk2+STwJ+IfFv5aQzn/SlwI/A/R7/g7m5mN3Fker6PRJc9ZrYeOAisdvcvDnDuvcATRFr6/+vufWb2MSKtdYBPuXu3mT0bPd8LRL4NvIq715rZr83scSAX+Hj0Vs0vRussaUhDK0tKM7PPEZnTtAvoIJL0NgUaFGBmnwC+Gf0QGezYPCLfNPqAZiKTjl9z/HfFh5mdDkx29/uCKF/iT0lfRCSD6O4dEZEMoqQvIpJBlPRFRDKIkr6ISAZR0hcRySBK+iIiGeT/A7gL+gsz1S+/AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learner.sched.plot()\n",
    "plt.axvline(x=3e-2, color=\"red\");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "先设置学习率微调embedding权重，一轮调最后一层包含词向量权重，训练出missing tokens更好的权重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 555,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46bd325a0bd7462da7b972678370b70f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss   accuracy                     \n",
      "    0      4.876916   4.759636   0.1581    \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([4.75964]), 0.15810018977639395]"
      ]
     },
     "execution_count": 555,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr=3e-2\n",
    "# lr=1e-3\n",
    "lrs = lr\n",
    "learner.fit(lrs/2, 1, wds=wd, use_clr=(32,2), cycle_len=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 556,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.save('lm_last_ft')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 557,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.load('lm_last_ft')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 558,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.unfreeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 559,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4beeff6e523b41d79faa3e96dd443fec",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss   accuracy                     \n",
      "    0      3.832584   3.737663   0.298853  \n",
      "    1      4.17896    4.02803    0.284122                     \n",
      "    2      4.162834   4.016865   0.292197                     \n",
      "    3      4.171302   4.024511   0.2974                       \n",
      "    4      4.174299   4.020073   0.300139                     \n",
      "    5      4.152954   4.005552   0.302002                     \n",
      "    6      4.139783   3.990187   0.303637                     \n",
      "    7      4.133696   3.991889   0.305091                     \n",
      "    8      4.125618   3.974964   0.306675                     \n",
      "    9      4.112508   3.959693   0.309578                     \n",
      "    10     4.085527   3.943077   0.311458                     \n",
      "    11     4.057175   3.91849    0.314173                     \n",
      "    12     4.046759   3.903127   0.316674                     \n",
      "    13     4.002753   3.877731   0.319239                     \n",
      "    14     3.987127   3.861441   0.3211                       \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([3.86144]), 0.3211004115516247]"
      ]
     },
     "execution_count": 559,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learner.fit(lrs, 1, wds=wd, use_clr=(20,10), cycle_len=15)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "We save the trained model weights and separately save the encoder part of the LM model as well. This will serve as our backbone in the classification task model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 560,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.save('lm1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 561,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.save_encoder('lm1_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 562,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD6CAYAAAC1W2xyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHjZJREFUeJzt3Xl8VNXdx/HPLztrCCQsihAB2QWXgGLZVRCXp7Zoa6u2da3W2gWXasWqSFvrVmt91Fpbt2r1ad2lqKDIoiIGkV0CCCgKIaxJWAJJzvNHMkOWuTPTMGRyJ9/365WXk8zJzJnD+J2T3z33XHPOISIiiSMp3h0QEZHYUrCLiCQYBbuISIJRsIuIJBgFu4hIglGwi4gkGAW7iEiCUbCLiCQYBbuISIJJiceTZmdnu9zc3Hg8tYiIby1cuHCrcy4nUru4BHtubi75+fnxeGoREd8ysw3RtFMpRkQkwSjYRUQSjIJdRCTBKNhFRBKMgl1EJMEo2EVEEoyCXUQkwfgq2AsKS7j/7VVsLS2Ld1dERJosXwX76sJSHnx3DdtK98e7KyIiTZavgt0s3j0QEWn6fBXsAQ4X7y6IiDRZvgr2wITdKddFRDz5K9hVihERichXwR6gGbuIiDefBXvVlF01dhERb74KdpViREQi81WwB6gUIyLizVfBrgm7iEhk/gr26lqMZuwiIt78Fezx7oCIiA/4KtgDtCpGRMSbr4I9sCpGpRgREW++DHYREfHmq2AP0IRdRMSbr4LdAmeeqhYjIuLJV8EeWBajWBcR8earYFeJXUQkMl8Fe4AqMSIi3nwV7BZcFqNkFxHx4q9gj3cHRER8wFfBHqBSjIiIN18Fu2lVjIhIRP4KdhVjREQiiirYzexGM5tvZtPNLC3E/dlmNtfMlprZXbHvZm0qxYiIeIsY7GbWAxjgnDsZmA50DdHsF8A0YDAwwcx6x7SXwb5U/VdnnoqIeItmxn4qkGVmc4ARwLoQbcYCM5xzlcBsYEzsuniQFjuKiEQWTbDnAEXOuZFUzdaHh2jTAdhVfbsYaF+3gZldaWb5ZpZfVFTUsN6qxC4iElE0wV4MrKq+/TlwZIg2W4HM6tuZ1d/X4px7zDmX55zLy8nJaUhfazzWIf26iEhCiybYFwJ51bd7URXudb0DjDOzJGAUMCs23astuLujijEiIp4iBrtz7kNgm5l9TNXMPcXMrq/T7EHgTGAJMM05tybmPUUX2hARiUZKNI2cc1fX+dEHde7fStWB1cahCbuIiCefnaBURbkuIuLNX8GuWoyISES+CvYArYoREfHmq2A/uAmYkl1ExIu/gr36v5qxi4h481ewq8QuIhKRr4I9QBN2ERFvPgv26jNPVYsREfHkq2BXKUZEJDJfBXuA5usiIt58FezBCbuSXUTEk7+C3bS7o4hIJP4K9nh3QETEB3wV7AFaFCMi4s1XwX7wYtbx7YeISFPmr2BXMUZEJCJfBXuAJuwiIt58FewHSzGKdhERL74KdhERicyXwa75uoiIN18Fu1bFiIhE5q9g1+WsRUQi8lewa7WjiEhEvgr2AJViRES8+SrYD17MWkREvPgr2HXmqYhIRL4K9gCVYkREvPkq2A+WYpTsIiJe/BXs8e6AiIgP+CrYA1SKERHx5qtg16oYEZHIIga7mQ0xs41mNq/6q09D2sRG9TVPNWUXEfGUEkWbLOAR59xvD7HNIdOZpyIikUVTiskCJprZAjN70SxkvEbTRkREGkE0wb4GuNU5NxToAoxqSBszu9LM8s0sv6ioqEGdDW4BpkqMiIinaIJ9PTCzxu2ODWnjnHvMOZfnnMvLycn5b/sJgP4QEBGJLJpgnwRcYGZJwEBgWQPbxIxOUBIR8RZNsD8EXAJ8BLwM7DWze8O1cc6tiGkvq6kUIyISWcRVMc65TcDoOj++Poo2MacrKImIROavE5S0qYCISES+CvYATdhFRLz5KtgPlmIU7SIiXnwV7CIiEpkvg13zdRERb74K9uD5SUp2ERFPPgt2rYoREYnEV8EeoDNPRUS8+SrYdeapiEhk/gp2XUFJRCQiXwV7kgWuoBTnjoiINGG+CvbAjL1SyS4i4slXwR6YsSvYRUS8+SrYkwPBXqlgFxHx4qtgPzhjj3NHRESaMF8Fu1X3VqUYERFvvgr2ZNXYRUQi8lWwqxQjIhKZr4I9sNyxQskuIuLJV8GenBQ4QUnBLiLixVfBrlKMiEhkPgv2qv+qFCMi4s1XwW5mmKkUIyISjq+CHarKMZqwi4h482Gwax27iEg4vgt2M6NCwS4i4sl3wZ5spv3YRUTC8F2wJ5l2dxQRCceHwa5SjIhIOP4L9iSVYkREwvFfsGtVjIhIWBGD3cyGmNlGM5tX/dUnRJsMM3vDzBab2TNmge26Yi/JTGeeioiEEc2MPQt4xDk3vPprVYg2FwEbnXODq9ufHstO1pSUpBOURETCiTbYJ5rZAjN70WM2PhaYUX37XWBMrDpYV5K2FBARCSuaYF8D3OqcGwp0AUaFaNMB2FV9uxhoX7eBmV1pZvlmll9UVNTQ/qoUIyISQTTBvh6YWeN2xxBttgKZ1bczq7+vxTn3mHMuzzmXl5OT89/3tJr2ihERCS+aYJ8EXGBmScBAYFmINu8A46pvjwVmxaZ79SUlqRQjIhJONMH+EHAJ8BHwMrDXzO6t0+ZZ4EgzWwJspyroDwudoCQiEl5KpAbOuU3A6Do/vr5OmzLg7Nh1y1uySjEiImH57gQl0wlKIiJh+S7Yk8y0CZiISBi+C/bkJNOMXUQkDN8Fu6nGLiISlu+CXfuxi4iE58NgVylGRCQc/wW7NgETEQnLf8Gu5Y4iImH5MNhVihERCcd3wZ6s3R1FRMLyX7AnKdhFRMLxXbCnpiRxoELBLiLixX/BnmQcqKiMdzdERJos3wV7SrJRrhm7iIgn3wV7anISByo1YxcR8eLPYFcpRkTEk++CPSVJpRgRkXB8F+xVq2I0YxcR8eK/YE8yLXcUEQnDf8GenES5ZuwiIp58F+wpyTpBSUQkHN8Fe2qycaCyEqeNwEREQvJhsCfhHNovRkTEg++CPSXZAChXsIuIhOS7YE9LruqyljyKiITmu2BPSaqasesAqohIaL4L9vTUZADKyivi3BMRkabJd8HeojrY9+5XsIuIhOK7YM9IreryvgOqsYuIhOK7YA+UYvapFCMiElLUwW5mk8xspsd955vZGjObV/2VGbsu1hYoxew7oGAXEQklJZpGZtYd+CFQ5NEkC7jNOfdsrDrmJUPBLiISVrQz9j8BN4e5Pwv4qZktMrM/HXq3vKnGLiISXsRgN7PvA4uBFWGaLQSuB/KAb5lZbiw6F4pKMSIi4UUzYz8bOBV4HjjRzH4aos1SYL5zrgLYCHSs28DMrjSzfDPLLyryquhEdrAUoxm7iEgoEYPdOfd959xw4AJgoXPuoRDN7geGm1kLoBuwOsTjPOacy3PO5eXk5DS4wxkp1evYNWMXEQkpqoOnNZnZKcApzrl7a/z4d8DjQBowxTm3I0b9qyc9WGNXsIuIhBJ1sDvn1gOnVX/7QZ37lgPDYtctb+kpSZhBmYJdRCQk352gZGZkpCSrFCMi4sF3wQ7QKj2ZPdorRkQkJF8Ge8u0FAW7iIgHnwZ7MrvLyuPdDRGRJsmXwd4qXTN2EREvvgz2lmnJ7N6vGbuISCi+DPZWaSkqxYiIePBlsLdMT2Z3mUoxIiKh+DLYW6enUKoZu4hISL4M9swWqRTvO0BlpYt3V0REmhxfBnu7lmk4ByX7NGsXEanLn8HeIhWAnXv3x7knIiJNjz+DvWV1sO85EOeeiIg0Pb4O9h17NGMXEanLl8Ge2SINgF17NWMXEanLp8FeNWMv9kGwPzN/A7sSqGT074UbeXnRxnh3Q0TC8GWwt21RdX2Qpj5j37xrH7e+sozBU94m96ZpzPpsC7k3TQsZ9NOWbOK8Rz4I8SiH15vLNpM3dSZLN+4C4MWFG3l+wRch29704hKu/9difvnC4qgf3zmnZakijey/vjReU5CekkxGahLFTWy545otJXRolU5WqzSWf72Lsx6cV+v+S578GICnPlzPtWN7YWY88t5a/vDmZ8E2uTdNY+Hk0ygoLOX4bu2CF+8OeG3x1/TIbsXAIzMB+HDtNob17FCvL/vLK9lSso/ZBUWcd2JXVheWkt06nc6ZGcE2z8zfwK2vLAPgnIfmMSQ3i4/XV13V8Py8o/jLnLX07dyGU3pmc/try3n+4y9r9RNgyjcH0KdTG07q0YEzHpjDZ5tLABjUNZOXrj6Fy5/O571VRaz+7QSOuWU6Px7Vg1+N78uIu2fx1c69wcdb+7szeemTjZx7/JGkJteeb3z74fc5P+8ovje0WxT/CiJizjX+bCovL8/l5+cf0mPk3jSNrlktmPersTHq1aFxznH0zf+J6WP269KW6T8fEfx+34EK+t76JgDXnd6braVlPPXhBvp3acuKTcUA3P+dweRv2MFzH4Wedb9/01ie/nA96clJPPjumpj2N5wfnZLLkx+s97z/3OOO4JVPv+aWM/txxcgeABSVlDHktzODbVZMGU9GSjKPzf2cHwzrTsu00POSGSsKGXFMdr0PxYDtu/ezclMx3+iVDUBBYQnZrdNp3yqtga9OpHGY2ULnXF7Edn4OdoD1d50Viy4dkh2793P8nTMOy2PfcmY/stukcUK3LEbd817INukpSZSVVx6W56/rtnP6c8frKw7b42e2SGVk7xzuOW9Q8EOspj6d2rCqsIT+XdqSmpLE4i93MvfGMZTsK2fa0q/531lrg22funQo7VumcWzXzODP5q4u4uK/LQh+//CFJ/CTZz/hiMwMPrj51FrPtXPPfi7+2wI6tc1gTN8cLjype8g+V1Q6HpvzOVeN6oGZsWpzCVtLy4IfHCKxkvDBfvU/FrKqsIR3rxsdm04dgsCHTCjPXn4Sw3p0YP7n2/j+4x81Yq8Oj0BJJVaGHt2eBeu2x+zxQlk4+TTmrt5K+1Zp/ODvCzzbLfj1qfxxZgHj+nem0jkue6r2e/TsQV14Y8mmepOJmv/+/7ziZL731/lA6EnHP+ZvoHjfAXbs3s8tZ/X37EtFpSM5yQAo3neA215dzj3nDSIl2ZeHxSRGog12X9bYAdq3SmsyJygdkZnB17v21frZrWf358KTugXLAaf0ymb9XWdx95uf8fB7a2u17du5DZ9tLmHW9aMZc+97jdVtANb9/kzMqgLkH/M3kJps/OrFpSHb9shpRWpyEsvuGM8LH3/JnW/UnrlP+eYAfjAslwMVlSzZuJOJj3zo+bzXj+vNvW8XcPs5Azjzwbmxe0EhnDh1ZuRGwNDfvQPAPxd8GfL+N5ZsAuAXzy/im8cfSbsWqew7UPsvpUCoA3xeVMrY+2YD8OHNY/ly+14mVx/TADhjYGcmPvIhT1wyhMpKR6e2GQw8MhPnHD1/fbCsl9MmnaKSMvp1acPc1VsZcUw2V47sCVQd0A4c+1g4+TTOe/RDfvetY0Med5Hmw7cz9gdmFvCnd1ZTMHVCvYNtje3WV5bxzPwNALxw5ckM6tqOFmmh67s1a/H/vmoYVzydzye3nh4M18pKx7gH5jDimGyeeH99vd9/5rKhFO8tZ+7qoloHMwEmn9WPqdNWBr+/eUJfvjvkKI6bcrBM9MNh3Vm5qYQXfnxy8Dnruv215Tz5wXp6ZLfiznMH0jo9hcFHtavXbnVhCR1ap7Np115yO7SiVXrtecKyr6pW2pz954MHkefeOIa1RaWM7tOR8opKUpKTqKh03Pf2qnofeAFLbh/HT59bxJyCopD3J5KRvXM8X+f3hh4V/NB59ZpvcM9bq5i3Zmu9dj1yWtX7S3bnnv0U7y2nW4eWQNX7LCkp9L+/NF0JX4p59qMN3PLyMj68eSxdMlvEqGcNc+0/F/HBmq3MuXFMvXA7VC99spFJ/7eYXh1bM3PSqHr3H6ioDJZGas6+a3LOeYZ4Y5m+dBOn9e8U9kN4d1k5JfvKOfn37wR/tvi2ccHzFp76YD23vbac3A4tWb9tj+fjLL9jPANue8vz/mV3jKegsIRtpfu54unI78P3rh/N6Eb+S+pQPXrRCTwwczVXjuxBduv0YAlq5ZQz2LO/nBOnzuT4bu247ZwBPDCzgPvOH0yH1unMWFHIFU/nM/fGMRzVvmWcX4XUlfDB/uqnX/Hz5z/ljAGdefTiE2PUs4a5+G8fUbKvnFeu+UZcnr+iep14coLMwOat3kqbjNB/JUDVbLNHdanij98dzC9fWMxzV5zE5U/lc9fEQfzP4CNwzvHVzr384O8L6JnTmhkrCunXpS1PXTqEjm0yaj3epl17Gfb7d4Pft8lI4YfDcslskUrfLm0YcUwOf5q5mj/OLAjZn9P6deIPE4+lVXoKlz31Mdt3H2Bl9SqlUJ6/8mQueGy+5/3x8p+fjahVFhtxTDZbS/fz2eZi1v2+9vGCdz8r5NIn85l9w2i6d2jF5FeWsmtvOX/+3vH1HvfxuZ8zuk9HenVsfdhfQ6JL+GBftbmE8Q/MAeK/Mqb3LdPp3bk1b1w7InJjiYnA+9bMIpYV9uwvZ+OOvfTu1MazTaCc9u+rhpGX297zOUfd8x5HZ7dido1ySai/lMIdUF855Qz6/ab+ip+a7p44iKLSMu55a1XYdgDDe2WHLMnE2vXjetOxbQYvfbKR+Z+HPuD97nWjGHvfbJ6+dCj9urRl/bbdnP9o1bGWmyf0JatlGje+uIRnLz9Jq4YaIOGDHZrGkseaNfN4f8BI43DOsXJTCf2PaBu2ze+nf8YPhnXnvVVFrNpcwm/O6c/OPQfIaZMebPfl9j2s37abgUdk0iItmUVf7KRflza0a3lwTf2e/eVMW7KJG/69pNZznNavIzNXbuH2c/qz5KtdvPTJV7F/sYdJWkoSBVMnBL8vLStn4G1vccP4Plw2/GjueH0F/1zwBQVTJ5CWUrt89+mXO0lNNgYckVn3YRNeswj2a577hCUbdzL3xvidpDRtySauee4T2rVM5dPfjItbPyTx3T+jgL6d25CXm8WD76xm8llVHxSd2qZjZnyxbQ9ds1qQlGTBSc8D3z2OvNwsuma1ZHZBEW0zUvjWw42/dUU4xx3Vjk+/3Bnyvid+NIRLnvyY8QM6cf93jqNVekrwtY0f0IlHLzqRu978jF+c2rvegoXfvLqMLcVlcS/VxlKzCPY7Xl/Ov/M3svSO8THoVcNc8sQCZq0q4qpRPblpQt+49UOkprmri9hdVsEZAzuHvL+svII+k6vKQXNuGMPIe2YF77vnvEE4Bze+uCTk7wIM7prJ4ur9haJxzZietU4ei5Ufj+rBX2Z/DlT9xfz64q85qUd7CjaXctHfDp43UjB1AvfPKOCqUT1q/TUU0BQWGESjWQT7n99ZzX0zCkL+udZY7nh9OU+8v56lt4+jTUZqXPogEgt1D8JvKd4HBuu37mHV5mJufXU5AL85uz+XDj86GIZrtpQAxmn3z/Z87Hm/GsPwP8zyvP9wM4NA1C2/Yzz/WbqJE7pn0TOnNS98/EXw3I1pPxvuWeIpLSunoLCEE7plNVa360n4E5QANu6o2kRq2de74jbYgbXmrWO8zFGksdVdVdWxbdXqoY5tMhh6dHsuGNqt1nLVwAy3V8eqg9JnDOjMm8s3k906jW/0yiarZRoje2fToVU6XbNaMuOXI+mR05p/5X/J7IIipi/bXOv53rh2OIXF++qd8RtKh1ZpbNsd/YV2as5fwy2FPevBeXRr35Ivtu+pd8zs0ic/ZsG67Tx3+Umc0iubr3bupUVqcr09hpxzvPLpV0wY2MVzv6LDLeoZu5lNAs50zp0W4r5s4GWgHTDNOXdTuMeK1Yx9+tJNXP3sJzx84QmceWyXQ368hmgKB3BFmootxfuCHwiR3DX9MwZ1zWTo0e1p3zItuLLJOUdJWTmDbn/b83d/+62B3PLyMs/7Y2n2DaN5ffHX3Pv2weWugbPFoWpn0j37y4N/sdfcj+juiYM478SuVDgXkxMpY1qKMbPuwGtAkUewTwVKgbuBRcD5zrnQi36JXbBv2LY7uDFWPII1sCLGjHrrfEXk0GwrLePEqTPp27kNU88dyKOz13L16F5VS1MvG0rLtBSG3fUO2a3TWbOltN7vr7/rLF5etJHH565j+dfe5xUcqpohP/iodiz2OBC8Ysp4zx1JoxXrYH8FeAyY5BHsHwDXOucWmtmDwHLn3F+8Hi9WwV5zG9vld4yP+Vmfkfzk2YX8Z+lmLj65O3eeO7BRn1tEanv106/IaZ3OCd2zapVAKiodP39+EWu2lAYDOB76dGrDyN7ZYTd/iyTaYI/4t4GZfR9YDITbq7UDEDhEXgzUO8PDzK40s3wzyy8qis2eHxmpyXSsXhP80KzG21s84D9Lq2qENS9eISLx8c3jjuSUXvX34U9OMh76/gm1rm3w9i9HctxR7bj7vEFcNvxo/v6jPN69rv6WHXVNOr13gy/4sqqwhL/OXRfcQ+lwijhjN7PngG5UHWjtA9zqnHuoTpv3gZ9Vz9j/DCxxzv3V6zFjNWMHeH3x11z7z0VA45djVF8X8ZfKShdVvXtbaRlmxl3TV/J/+VXX+A38f15RWXv3TS9PXzo05DbRZx7bmYcvbNja+pgvdzSzXOBxj1LMFGAv8AfgU+DbzjnPKXQsgx2qAnbiCV1JTTYmntiVIR6nhMdSzVPGFewizVOorSMC20FcM6YXby/fzCOz17Loi4N19+G9svnH5Sc16PkO23JHMzsFOMU5d2+NHz9I1aqYC4HXw4X64fLiJ1WfqrNWbeGjX9f77ImpLcUH916/8Yw+h/W5RKTpuuvbx3LTS0tZOeUMtpaWkZGaXGvLiHEDOjNuQNVJYqsLSzj9j3M4rV/Hw94vX5+gFFD3U/NwzqD3l1fSe/LBKwhpti4i0dq8a19wC4iGiNnBUz/4yeietb5fs6WUtUX1lz8dCucccwqKaoV6qC1KRUS8dM7MaJStCxLidMnrx/WhsLiMt5ZvprSsPHhq8+Sz+nH5iB4NftyZKwrZuGMPp/brxIi7658OfXy30PuFi4jEU0IEe1KScd93BjPvd0WUlpUHfz512kp27NlP/vodfLRuO+9cN4qeOdFv9n959dV1bn899ErPzlGeYSci0pgSohQT8MhF9ZcQ/e+stXy0ruqiAKfeN5vcm6axpXgfO0LsM7Fzz36eeH8dZeUVbA+zD8X4AZ24eUJfXTFeRJqkhDh4WtOFj8/n/TXbompb88BnNGtT7zx3IBkpSZyfd9Qh9VFEpCGaxe6OoTx7+cnAwYtAh1NR6Sgrr8C58Du+BVx8cveY9FFE5HBKuGAP+PYJXclITWbGikJeXvQVk8/qx9RpK2u1iebssaFHt2fBuu3866phh6urIiIxlXClmHBK9h3g2DBbgQZcNaonF57UjY5t00lPic9+yiIidTXbUkw4bTJSefHqYdzy8rKQu7zNnDSKXh2jXzUjItIUNatgBzixe3ve/MVI4OAZqwOPbMuvz+ynUBeRhNDsgr2mmZNG8tbyQn4yuqcvLmQrIhKNZh3svTq2CV6vUUQkUegMGxGRBKNgFxFJMAp2EZEEo2AXEUkwCnYRkQSjYBcRSTAKdhGRBKNgFxFJMHHZBMzMioANDfz1bGBrDLuTSDQ2oWlcQtO4eGuqY9PdOZcTqVFcgv1QmFl+NLubNUcam9A0LqFpXLz5fWxUihERSTAKdhGRBOPHYH8s3h1owjQ2oWlcQtO4ePP12Piuxi4iIuH5ccYuIiJh+CbYzSzDzN4ws8Vm9ow1oytjmNkQM9toZvOqvwbXHYtQ45PoY2ZmqWb2evXtqF5/cxinOuNS973TpxmPy1NmNt/MXjOz1on8fvFNsAMXARudc4OBLOD0OPenMWUBjzjnhjvnhgNDqD8WocYnYcfMzFoACzn4mqJ9/Qk9TiHGpdZ7xzm3iuY5LsOBFOfcyUBb4FIS+P3ip2AfC8yovv0uMCaOfWlsWcBEM1tgZi8Cp1J/LEKNT8KOmXNur3NuELCx+kfRvv6EHqcQ41LrvVM9u2x24wIUAn+qvp0E3E4Cv1/8FOwdgF3Vt4uB9nHsS2NbA9zqnBsKdAG+Tf2xCDU+zWnMon39zW2c6r53RtEMx8U5t9o5t8DMvgVUAotI4PeLn655uhXIrL6dSdM83fdwWQ8sq3H7eOqPResof5aoQr0/oh2TRB6n9dR+73Tk0MbKt8zsf4CfAecAj5LA7xc/zdjfAcZV3x4LzIpjXxrbJOACM0sCBgLXUX8sQo1PcxqzaF9/cxunuu+dZTTDcTGzzsANwNnOuRIS/P3ip2B/FjjSzJYA26ka3ObiIeAS4CPgZeBv1B+LUOPTnMYs2tff3Map1nvHObeC5jkuP6SqFPWWmc0DUkng94tOUBIRSTB+mrGLiEgUFOwiIglGwS4ikmAU7CIiCUbBLiKSYBTsIiIJRsEuIpJg/h8Vcv+er95TMAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learner.sched.plot_loss()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "## Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 563,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_cla = np.array([[stoi[t] for t in e.split()] for e in train_B1_df['word']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 564,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_label = train_B1_df['c_numerical'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 565,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split \n",
    "trn_clas, val_clas, trn_labels, val_labels = train_test_split(trn_cla, trn_label, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 566,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bptt,em_sz,nh,nl = 70,300,1150,3\n",
    "vs = len(itos)\n",
    "opt_fn = partial(optim.Adam, betas=(0.8, 0.99))\n",
    "bs = 48"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 567,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "min_lbl = trn_labels.min()\n",
    "trn_labels -= min_lbl\n",
    "val_labels -= min_lbl\n",
    "c=int(trn_labels.max())+1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 568,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_ds = TextDataset(trn_clas, trn_labels)\n",
    "val_ds = TextDataset(val_clas, val_labels)\n",
    "trn_samp = SortishSampler(trn_clas, key=lambda x: len(trn_clas[x]), bs=bs//2)\n",
    "val_samp = SortSampler(val_clas, key=lambda x: len(val_clas[x]))\n",
    "trn_dl = DataLoader(trn_ds, bs//2, transpose=True, num_workers=1, pad_idx=1, sampler=trn_samp)\n",
    "val_dl = DataLoader(val_ds, bs, transpose=True, num_workers=1, pad_idx=1, sampler=val_samp)\n",
    "md = ModelData(PATH, trn_dl, val_dl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 569,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# part 1\n",
    "dps = np.array([0.4, 0.5, 0.05, 0.3, 0.1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 494,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "dps = np.array([0.4,0.5,0.05,0.3,0.4])*0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 570,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "m = get_rnn_classifier(bptt, 20*70, c, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,\n",
    "          layers=[em_sz*3, 50, c], drops=[dps[4], 0.1],\n",
    "          dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 571,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "opt_fn = partial(optim.Adam, betas=(0.7, 0.99))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 572,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=opt_fn)\n",
    "learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)\n",
    "learn.clip=.25\n",
    "learn.metrics = [accuracy]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 526,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "lr=3e-2\n",
    "lrm = 2.6\n",
    "lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 573,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "lrs=np.array([1e-4,1e-4,1e-4,1e-3,1e-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 574,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "wd = 1e-7\n",
    "wd = 0\n",
    "learner.load_encoder('lm1_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 575,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.freeze_to(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 577,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# learn.lr_find(lrs/1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 578,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# learn.sched.plot()\n",
    "# plt.axvline(x=3e-2, color=\"red\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 576,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "442db7306bc740cf83314a8a4049d624",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss   accuracy                   \n",
      "    0      2.77859    2.75312    0.15875   \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([2.75312]), 0.15875000009685755]"
      ]
     },
     "execution_count": 576,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learn.fit(lrs, 1, wds=wd, cycle_len=1, use_clr=(8,3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 531,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00066 0.00171 0.00444 0.01154 0.03   ]\n"
     ]
    }
   ],
   "source": [
    "print(lrs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 579,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.save('clas_0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 580,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.load('clas_0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 581,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.freeze_to(-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 582,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "10c21a3f5f6a421bae9514faf2c8f4d6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss   accuracy                   \n",
      "    0      2.744222   2.738882   0.15875   \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([2.73888]), 0.15875000009685755]"
      ]
     },
     "execution_count": 582,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learn.fit(lrs, 1, wds=wd, cycle_len=1, use_clr=(8,3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 583,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.save('clas_1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 584,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.load('clas_1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 585,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.unfreeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 586,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2337e6f09c144be396c86bfe03252180",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=14), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss   accuracy                   \n",
      "    0      2.558027   2.495492   0.23075   \n",
      "    1      2.391883   2.469435   0.2465                     \n",
      "    2      2.251277   2.736789   0.2985                     \n",
      "    3      2.154949   4.104603   0.339                      \n",
      "    4      2.084121   4.322234   0.35425                    \n",
      "    5      2.00256    1.975354   0.373                      \n",
      "    6      1.934767   2.390207   0.40875                    \n",
      "    7      1.945743   2.734143   0.4125                     \n",
      "    8      1.813279   3.075174   0.43725                    \n",
      "    9      1.784126   3.330584   0.4565                     \n",
      "    10     1.736501   1.763323   0.4655                     \n",
      "    11     1.737804   2.636687   0.47025                    \n",
      "    12     1.700827   5.038342   0.47225                    \n",
      "    13     1.623694   5.834108   0.472                      \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([5.83411]), 0.47199999922513963]"
      ]
     },
     "execution_count": 586,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learn.fit(lrs, 1, wds=wd, cycle_len=14, use_clr=(32,10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 587,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD6CAYAAAC1W2xyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl4VNX5B/Dvmz0hIQQI+xIg7JtIZAfZBBGwSlvrvqBSqS22ViyoVVwoSKmKP7fiUsENaxVREdmRxbAEZA07RPYQAiRk387vj1nv3DtbmGQyk+/nefp475kzM2duwztnzj3nPaKUAhERBY8QfzeAiIh8i4GdiCjIMLATEQUZBnYioiDDwE5EFGQY2ImIggwDOxFRkGFgJyIKMgzsRERBJswfb9qwYUOVlJTkj7cmIgpY27dvv6CUSnRXzy+BPSkpCWlpaf54ayKigCUiv3hSj0MxRERBhoGdiCjIMLATEQUZBnYioiDDwE5EFGQY2ImIggwDOxFRkAm4wK6UwhdpJ1FYUu7vphAR1UgBF9if/novpv5vN6Ys+tnfTSEiqpECKrCfyynCp1tOAABiI/2yaJaIqMYLqMB++nKh9ZiBnYjIWEAF9hCxHV8qKPFfQ4iIarAAC+y2yJ5XXObHlhAR1VweBXYRWSAim0XkGxHRjYGISB0RWSIim0Rkju+baWIf2ItLK6rqbYiIAprbwC4igwCEKaX6AagLYJRBtbsAbFZKDQTQVUQ6+7aZlrbYjlOPZeNCXjGUUlXxVkREAcuTHnsmgHlu6l8GECsioQCiAVTJALhjDE95aRXmLD9YFW9FRBSw3AZ2pdRhpdRWEbkVQAWAFQbVFgO4EcBRAPuVUkcdK4jIJBFJE5G0rKysSjW23KB3/tlW0/THOT8cwKsrD1XqdYmIgomnY+w3A5gCYLxSyuiu5XQAbyulkgDUF5EBjhWUUvOVUilKqZTERLc7Oxkqr9AH9rJyhYoKhbfWHcW81Ycr9bpERMHEkzH2JgCmAhinlLripFocgCLzcTGAWN80T6vCyXj6kl2nrcdJ05Zi6D/XVsXbExEFBE967PcBaApguYhsFJEHRWSuQ503AUwWkVSYxthX+7idAGw99g8fuE5TfvJioeY8I7sAxy/kV0UTiIhqPLfLN5VSLwN42U2dDAADfdQmpyrMgT0qPNRalldchlcMxtZ/OnoBbRrWqeomERHVOAG1QGnV/vMAgFOXCt3UBJ5evBdnLruvR0QUbAIqsOebV5uWllfgyMwxmDI8WfN447qRmvMTFwuqrW1ERDVFQAV2e2GhIUhyGGrZ8tRIHJ91k/U8n2kHiKgWCqjAPrZHUwDAdUkJAICD5/STdEQEG54cBgAciiGiWimgAvuQDonImD0WyY3iAADjezYDADSNj8Kqx6+31msQGwEA+PuSfdXfSCIiPwvopObdmsdjw5PD0KxeNELtcvrGRNg+1vkrRWgUF+WP5hER+UVA9diNtKwfownqjt5aq8tuQEQU1AI+sDvTukEMAODDnzJwONPZglkiouATtIF93u29rMc3vLrejy0hIqpeQRvYr2lZz99NICLyi6AN7EREtVVQB/ZrW9l67aXl3EqPiGqHoA7sX062pYVv//QyFJaUI4NZH4koyAV1YBfRToN8bNHPGDp3HUrK2HsnouAV1IEdAJrXi7Yer0jPBAB0eGaZv5pDRFTlgj6wf/unQf5uAhFRtQr6wF6/TgRGdGqkK88pKPVDa4iIql7QB3YAGNy+oa7sk62/+KElRERVr1YE9vsGJOnK5vxwsPobQkRUDWpFYBcRbJo2HF2b1cX9BkGeiCiYBHTaXm80rxeNpVMGAzAlBiMiCla1osfuyEWWXyKigFcrA3uFMv233HJARBREPArsIrJARDaLyDciYjh8IyJPmussE5EI3zazamw5lu3vJhAR+ZzbwC4igwCEKaX6AagLYJRBnbYAuprrLAPQwtcNrQq//3i7v5tARORznvTYMwHMc1N/BIAEEVkPYDCA4z5oW5XZ/sxIADBcuEREFOjcBnal1GGl1FYRuRVABYAVBtUSAWQppYbA1FvXreMXkUkikiYiaVlZWVfb7qvSIDYSAPD1zjN+bQcRUVXwdIz9ZgBTAIxXSpUZVMkFYFnxcwxAc8cKSqn5SqkUpVRKYmJiZdvrc0fO56GMudqJKIh4MsbeBMBUAOOUUs52hd4OIMV8nAxTcA8II1/5EclPL2OediIKGp702O8D0BTAchHZKCIPishc+wpKqVQA2SKyDcBBpdTWKmhrlVp78Ly/m0BE5BOiVPXP5U5JSVFpaWnV/r721hzIxMQPtW3ImD1Wc15eoRDK1UxEVEOIyHalVIq7erVygRIADO/UWFd28mIBikrLAQAzl6aj3VPfY9ORC9XdNCKiq1JrA7uRwXPWotPffwAAvLvBNGPzrve2+LNJREReq9WBPWP2WN3wC8BUA0QU2Gp1YHcmp1C7u5I/7kMQEVUWA7uBXScva87zio2m7hMR1UwM7ACmDE/WnO84cUlzfuCcs+n7wJKdp3HgXG6VtIuIqDIY2AHERpkSVt7TrzUA/UYcv30n1elzH1u0Eze+tqHK2kZE5K1as4OSKw8MbIPo8FCM6NwYH23+BVeKPBt6sUyNJCKqSdhjBxAeGoJ7+iehoTk5mMWsCd1dPo9z3ImoJmKP3U5EmPZ77o4+rbB6fybOXC7S1S2vUHhwgX9XzxIRGWGP3Y26UeHILdJPf/z9R9ykg4hqJgZ2Bwsm9gEAvHuvKR1DbFQYTl0q1NS5VFCKVfszreeOQzhERP7EwO7g+g6JyJg9Fjd0MeWS+WzrCQBAToGp134hrxj/TTvp8CyFpbvP6nr2RET+wMDuxgMD2wCANWjf+e5mzF52wPr4wOQGuJBXgkc/3YEeM4w2lyIiql4M7G5c26oeAFinQB7KzNM83r5RnOb8UKbzxUxERNWBgd2NuKhwADAcZpk1oTtOXSrQlH2+zXGYhoioenG6oxuWjTZmLzuAnQ45ZEZ0aoQhHRKxav8aa9m2jIvV2j4iIkfssbvRuG4UAOC4wZ6ocVHhaF4vWlO2+1QOjmXl6eoSEVUXBnY32jSsA0CfyhcAosJNl+/nv9+Agy/daC1/Y+2R6mkcEZEBBvarIGIapkmoE4HIsFBr+Vc7TqP909/7q1lEVMsxsHvpxV91xfBOjRARqr90nz7U13pcWq506X+JiKoDb556qVWDOvjg/iTDxwpKtNkeMy7k49pWCdXQKiIiG/bYPbBoUj/rcVSY80s2onOj6mgOEZFLHgV2EVkgIptF5BsRcdrLF5HHRWSV75pXM/Rr28B6XOgiB7tlzN0iJiLUSU0ioqrjNrCLyCAAYUqpfgDqAhjlpF5rAPf5tnk1x5AOiQCAzk3ruqxXLybcevzIxztwIa/Yev6fTcfR64UVuGneBhSXcZMOIqoanvTYMwHM86D+PADTr7pFNdTCiX2QMXusdV67Mx9N7Isnb+xoPU95aRWSpi3F+StFeP7bdFwqKEX62Vx8teN0VTeZiGopt4FdKXVYKbVVRG4FUAFAl+lKRO4EsAtAurPXEZFJIpImImlZWVlX0+YarXuLeNzXP0lXfsf8zZrz6V/tqaYWEVFt4+kY+80ApgAYr5Qy2hB0HIARABYB6C0if3SsoJSar5RKUUqlJCYmXk2ba7w6kfrbEEez9CtX7eUVl2HMvA34aPMvKC2vqKqmEVEt4MkYexMAUwGMU0oZpi5USt2plBoE4HYA25VSb/i2mYFn7/Oj3dYpKi23Jhf7+cQl7D+bi79/vRdvrOHKVSKqPE967PcBaApguYhsFJEHRWRuFbcr4MUa9NodDZi9Bj1mrEBecRlC7GbUrD5g2p1pzYFMFJbwJisRecdt9FFKvQzgZU9eTCmVAWDkVbYpqH33p0H4asdpfLDpOC7mlwAA5q06hHc3HLfW2Xs6F2+vO4qXfzBt6HFHn1aYNaG7X9pLRIGHC5SqUIh2Wjsev6EDujWPR1io9gGjmTaWoA7YtucjIvIEUwpUoWOzxgIAsvOK0cBuw2vHm6O5Rdr70T1bxGPXqRxNWXFZuSbRGBGRM+yxVwP7oA4AAm2Pffsv2s05isv0s2LyiowmIxER6TGw+8H9A5I055uOZGvOD5zTTz7q/dIqw+35iIgcMbD7QasGMR7Vm9CrueZ83qrDVdEcIgoyDOx+svf50Zg2ppOmbHD7hppzxx76+xuPg4jIHQZ2P4mNDMMj17fTlE0Z0R7Pje9iPV+1/zxm3tqtuptGRAGOgd3PFv9hAB4a1AZTR3dE71YJeGBgG83j47o3sx53aBxb3c0jogDE6Y5+1qtVAno52WXptpQWiI8Jx9COiVh3MAuHMvOquXVEFIgY2GugXc+NQurRbIzq0hgA8OEDfdD7xZXINq9UJSJyhUMxNVB8dDhu7NYEIXZLV2/q3lSziQcRkTMM7AGioKQclwvcz2Of88MBbDpyoRpaREQ1FQN7gNhz+jIA4MdDWXh99WFMWphmTSJmkVNYirfWHcVd723xRxOJqIbgGHuAaBIfjUOZebjvg63WslX7V1rz0QDAhsO2nalOXSpAiwTPFkIRUXBhjz1ArD+k306wQmnPl+w8Yz2etHB7VTeJiGooBvYA8e0fB7mtszI903pcoZSLmkQUzDgUEyC6t4h3+lhRaTm+3HFKU5ZfUobyCoVQx6TwRBT02GMPIH2S6uvKKioU7n5vC55evFdTfvJiIV5bdai6mkZENQgDewDp1DROV3bo/BWk/XLJsP7/cVNsolqJgT2AGA2q3PjaBs351NEdq6cxRFRjMbAHkMdH2YJ2Dydj7kM7Jjp9/vkrRahwnEpDREGHN08DSHx0OA7PHIMrRWW4VFCCEf/6UVenfp0ITBzYBh9s0uZuv5BXjD4zVwMAMmaP1T2PiIKHRz12EVkgIptF5BsRMfwy8KQOXb3w0BDUrxOB+jERho83iovCs3Y53QFAKYUPuEkHUa3hNrCLyCAAYUqpfgDqAhhVmTrkW9ERoYbllumNv7++LSLDTP/3Ltl5Bm+tO1ptbSMi//Kkx54JYJ6b+p7UIR+yBG1nosJCUVxWgYoKhT9/vlPzWGFJOQBTbplLTAVMFHTcDpkopQ4DgIjcCqACwIrK1BGRSQAmAUCrVq2uqtEEiOjnyHz8YF/rsaVH3/ap73X1DmZeQcuEaPR+aRUA4NBLYxDh5ouCiAKHp2PsNwOYAmC8UqqsMnWUUvOVUilKqZTEROczN8hzW58eAfuFpYPsNsOODjceqgGAW97cZA3qAJCdX1wl7SMi//BkjL0JgKkAximlrlS2Dvleo7gobHlqJADgy8n9NY9FhXveA193MAul5RU+bRsR+Y8n//rvA9AUwHIR2SgiD4rIXDd1Jvq6oWQsMS4SGbPHondrbboBMVzOZGz6V3tw42vrNWUVFQpbj1/0SRuJqHq5DexKqZeVUslKqUHm/72vlHrCTZ0Pqq7J5In0s7le1T+alY9zOUXW8/c3Hsdt/041TBfsTFFpOT7dcgKKmSWJ/Ip3zIJUSlKC18/pN2u19Xjnycua/3rilZWH8NTiPVi+75zX701EvsPAHqTGdm+qK4sIC8F796bgmpb1AADdmtfV1XllxUEAQG6RaX/Vjzb/4vF7zl9/DACQV1zudXuJyHcY2IOU0XTInc/egJFdGiMhJhwAMGV4e9zQpbGmzuvmjJC9W5t6/N2a6YO/O2HMAU/kVwzsQWzXc6Nw/4AkAMArt/VETIRp2cJz47tiVJfGGNw+0TAbpFIK4aGmP421B7OsC5pcyS+2zXCNiQjFwtQMJE1bihUuhmX2nMpBTkGpF5+IiDzBwB7E4qPD8ey4Ltj+zEhMuLaFtTypYR3MvzcF0RGh6NA4Dhmzx+KTh2yLm1amZ2rGyTNzi+DO3e9vsR4XlJTj2SX7AAAzvtlnWF8phfFvbMRd72/2+nMRkWsM7EEuJETQIDbSbb2BybbFTbN/OIDdp3Ks50PnrnP7/J9P2G6yXi6wpSm4u39rw/rFZaZ583tPezd7h4jcY2AnnWNZ+bqypxfv0ZWdzSnE9K9266Y3zvg23XocFWa8AvZsjvtfAURUOQzsZOVq96VPtpwAAKQezUbStKU4ebEA/WetwWdbT2L94QtOn/fCd+kY/38bdeXbMrj4iaiqMLCT1eTr22nOm9eL1pzvPHkZd7xrGhMfPGettbywxDB9kNWe0zkoKtXegG0Wb3rteuYZOkTkOwzsZBXiME3RsQe/Mt14hktkWCj6t22Ari6mRjoO7xSaA31YCP8EiXyN/6pIY+LANtbjm7o31Wyj9+Za4806CkvLkXosG8cv6MfmLWZ+n645twT2C3nMLEnka9zCjjSmjemEhnEReGBAG49ztP/hkx0ATNMcb+zaBD+Yp0q2bhCDX7ILAAAt6sVonlNkNze+uKwckU5ushKR99hjJ42IsBD8YWiyZuu9JnWjPH7+PebpjZFhIVj1+PXW8g5N4jT1ispsgT2vyPUYPRF5h4Gd3Fr86ACP63ZrHo+6UWFYOLEPwkND8Pdxpo21l+w8ralnfzM1n7lliHyKgZ3cahofbVgeG6kdyZvz6x6Ijw7H7hmj0bdtAwDAg4NMY/b2C54AoLjUtrHHkH+uRdK0pdhvTjWcfiYX764/hjJu/kFUKRxjJ69teHIYzuUWoWOTOLyz7ijeWme6qWqQd8wqqYHDGHuZvpe++OfTEAFuen0DAODYhXzMmtDddw0nqiXYYyePfDnZNhzTsn4Mrkuqj7pR4agbbZuHXuFkg43hnRohI7sAC1MzrGWHMvN09UrKKvDysgPW88+2ctMOospgYCePXNuqHqYMT8aKvwzRlMdF2X70/dou0Zi9sgpTcLYkBssvLsPK9ExdPRHbNEiLb3ad8biNY1/fgDvmM6kYEQM7eURE8PiojujQWDu7JdRu/CUs1PjP6dSlAuvxt7vOIOuK8dz1w5l5aJmgHbJ5deUhj9u470wuUo9le1yfKFhxjJ2uimWgZGByA6d1GtSJsK48/dNnP2se2z1jFHrMWAEA2Hjkgm5q5TknKYNzCkoBMaUmJiIt9tjpqliyEDibOQMAUeHOFx/VjQrHsX/cZD13DORFpcYzY3q+sAI9n19hPb/3g62eNBepR7O92qCbKBAxsNNVGd21CTo3rYvJQ9s5rfP7IcaPPTO2MwB9jhoAOPjSjR69f3mFwr9/POpxsL7j3c0efwkQBSoGdroq9WIisOyxwWiXGOu0zqD2DbF7xihdeQu78fQYu5WuItCkGKio0M6Msd+G7+TFAsyym0lDRB4GdhFZICKbReQbEdGNy4tIlIh8JyK7ROQjMdpJmWq1ulHh2PDkME1ZZLjtz+9fv+1pPXac4bjmwHnr8fpDWej63HLreXa+/kassymSjl8QRMHK7c1TERkEIEwp1U9E1gEYBeB7h2p3AzillBonIt8BuAHAChDZaVlfO+MlpXWC9Tg2Sv+nOLprYyzfl4l/Lj+I9zYew+Zj+s051h/Sb/Kx48QlrN5/HhnZ+YiPDsesCT0AAPvPcRs+qh08mRWTCWCe+dhZD384gC/Nx2sADAMDO7kQFxmGuCjbjBb7YwvLjJeDmVecvs681Yd1ZX/5fBdOXLRNsbQE9rV2Pf//bT+F3/Q2nndPFOjcDsUopQ4rpbaKyK0AKmAcsBsAsCQDyQVQ37GCiEwSkTQRScvK4qyE2s4x22OcQY/dk024h3dqpCuzH4O31615vPX4iS92uX3tyth/Nhe3z0+tktcm8pSnY+w3A5gCYLxSyuhfzQUAln818eZzDaXUfKVUilIqJTExsbLtpQDXIsE0LXJ8j6aacqPAXtegF+/Ifvx9aEfT39UVJ4G9pKzqk4qNmbcBm49dxEYX+8ASVTW3gV1EmgCYCmCcUsrZb+LVMI29A6ZhmbVO6lEt987dvQEAN3RtoimPtpvrPvPWbgCAbs2db7XnaOPfhmHS4LYA9AF8w2HTL8Sle85qyl3t+HS15q32fMUska950mO/D0BTAMtFZKOIPCgicx3qfAKguYjsBnARpkBPpNOteTwyZo/VbZQdYp5IldQgBnf1NW3WMbi98192wzpqH2sYG6mZZWPvsUU7UVBSpktl8OySvV6331PbMi5V2WsTueP25qlS6mUAL7upUwxgnK8aRbVPncgwzPl1Dwxs31BTnv7CaHR5drmmrG5UGP59Two6PLPMWhYVHup0e72L+SW61wCADQbDJWkZF9EuMRYJdSIq8zGIagTmiqEa47brWurKYiLC8NrvrkHHJnEYM8+Up72orMJwP1b7ZGPeGjZ3nWZoxn4T78rok2SaP6CUwvJ9mbihS2OEGqywJaoKXHlKNd4tvZqjc9O6aBZvShDmOIY+aYhpbD25UZzuuUYWTuyjOb9cUOKz8XbLhiJbM0xz7r/dfRaPfLwdY+at98nrE3mCgZ0CxofmgDxtTCcAwP0DkgDY5rsnN7KlNXAcw7c3pINtfL68QuGGV/VBd8nO00iathQHzzmfQ++ovEIhI1v7q2GKOZvlocw8vLfhmMevRXQ1GNgpYHRoHIedz96A35t76M+N74LXfncNHjbPhrH33Z8G6VIYAMDU0R0BAI3iIhEXGYaHFmwzzA//2KKdAIDRr+mDfm5RKU5e1A/75BXZpll2bKz/9fDS0v2GzyPyNY6xU0CpF2O7qSkiuKVXc8N60RGhuhugjw5rh0eHJQMAzpuD+dqD3i+Ws+SPdxyHn/2DLRlZXnEZLuaX6J6bW1Tq9fsReYuBnYLKj1OHorC03DAHfJ3Iqv1z/2zrCevx6cuFuPbFlbo6xdWwSIqIQzEUVFo3qINOTWwLm+zTAVvmxwPAbz3ME9OzRbzTxzYdMV5dervB7B4LjrNTdWCPnYJaQkwECkoKAWi30fN0b9RyJymAAeCu97YAAGaM76JJYmZJm2Dk+z3nkHo0G/3bOd9KkOhqscdOQe258V0QERaCLyf315S/fkcvzfmz47oYPr+gpNzte8z4Nh2v2G267W7I5+ufTzt9rKSsAjmFpej7j1VYZDe0Q+QNBnYKaqO6NsGhl8agd2ttwtFrWyVozicOamP4/ILicixMzUCek8RiFr1a1bMelzts6PGQw2s7rq61N2D2GvR8fgUyc4sxZ/lBl+9J5AwDO9V6o7o01pzH2vW4z+UW4dkl+/DCt/sAGO/OlBATju92mxKM3dGnFVamZ2oef+qmznjv3hTreUSo8392F/JsUy8v5pcgadpSzGWAJy8xsFOt9afhyWgaH4X5dkEXAL7540AMcBgDt8x1N5rCeKnANoXxxV91Reem2qyUISGCkV0aY/VfrwdgSn0w+tX11jnte0/nYMy8DUiattSwnW+sPeLlJ6PajoGdaq2/juqI1OkjrOcv/KorZk/ojraJsfj04X7o3TpB9xzLzdQXb+lm+JphoSF4Zmxn6/kH99u+NCxTMF9auh8HM69Y571P/mQ79p/ltn3kOwzsRGb39k/C7X1aWc//aF7MBABHsvJwLqcIfWaaM1K7mC0TZjfUMryTbZgnzCEJWGyEacjn5MXCq2o3kSMGdiIn7Hd1OnmxEEt22mazfLPrDF65radXr2c/3RIAPk87iXfXc147+R4DO5ETjml2j5zPsx6XVShMuLYFtjw1wvFpAIBW9WMwsrN2P1aj1bAzv9/vdbtOXy5ERYXxL4bM3CKcucxfALUdFygROdG9uXbV6RfbT1mPp44yJROzn+HyuxTbitP1BgnIroZSCiKCU5cKMOjltZgyoj0ev6GDrl7ff5iGijJmj8Xj/92JhrGReOqmzrp6FNzYYydyIszFtETLylH7DT98EUAPvnSjYfm2jEvY/stF69THHw9pk5edvlyINHMOeIuvdpzGfA711EoM7EQuvPa7awzLxbxHq30umphI46357NnPkjFiv73f8j8PsR6fzSnEr99Oxdc7z5gKHG7eDvvnOvzmnVTr+cLUDLdtoeDFwE7kwi29mutuev40bbj1WETQp41pVWu4ix6+RbmT5I6PDmuHTx/uqylrbd6NCbDlh7dQADYfy8aQOWuRX1yGEocXfnbJPt17XDIvePrPpuNu20mBjWPsRG7kFNoWIMVGhqGZw+5M/7n/OmTn6RcuGenUxHj7vklD2um+QCIN9nW1KCtXuH3+ZgDAF2knPXpvy/DN89+m4/lv06355JVSOJtTpPtcFLjYYyfyglHOmDqRYWhl17t2pWX9GByeOQbHZ92kKbcP6q/f0QtjujWBiCAq3PifaLrdgqYZ36a7fE9LGoS31x3VlF8xb/rx4U8ZGDB7DfaeztHluaHA5FFgF5FwEfnWxeN1RGSJiGwSkTm+ax5R8AkPDYGIYN7t12D2hO6Y8+semsdv7tkMb9/dGwAMZ75464r5y+hgpnb/1u7mnaB+OmpKYTzu/zbioQXbsGTnaQyZs9bplEoAmLk03WkKBPI/t4FdRKIBbAdwg4tqdwHYrJQaCKCriHB+FQWNIzPHuEzcVVm/uqY5bu/TCre52Jhj0pB2Xr/uvf1ba84v57veju9olm1+/tqDWXjqqz04cbEAV4qcZ7R8d4NpnN4+Kdqq9Ex8tvUEvkg7iczcIhSVuk95TFXD7V+rUqpQKdUDwCkX1S4DiBWRUADRADwbcCQKAGGhIXjnnmsBAA0c9lGtDp9P6udV/WfGanPLF5S6TjncMDZSc55vzkHv7nkAsOPEJevxQwvTMP2rPZj6v93o+4/V6PT3H/DNrjOeNpt8yFfdkMUAbgRwFMB+pdRRN/WJAsqwjo2wcGIfbH16ZLW/d9+2DdCyvvsbm52b1kXG7LGaufWAbbOQdol1dM9RSmHr8Yu6cgAoKnW/P2tOYSnKKxRecDLO/9iin63HFRUK+87kuH1Nunq+CuzTAbytlEoCUF9EBjhWEJFJIpImImlZWd7vDE/kTyKCIR0SdWkGqss3jw7CrAndXdZZ9LCtZ58xeywWmXv6Oea0wkaBus30752+XqEHu0e9u/44lu45iw+cTKG0n27//sbjGPv6Rt1CKld2n7qMMmdzRMkpXwX2OABF5uNiALGOFZRS85VSKUo9OKZ3AAAOo0lEQVSplMTERB+9LVHtkFAnAnfYZZ509MSoDoiP0U6XtCyeeuDDbci4kI/Tlwvxm94tNJt+uFJUpg3sSilc/8+1+Cg1A83NUyO7NKuLKZ/9bPBsvb3m3vqpS57lslm9PxM3v7EJyU8vw/EL+R49h0y8Duwi0kZE5joUvwlgsoikwjTGvtoXjSMirTm/6YGHB+u38TPqjduvih06dx0A025PIx12jHImz+HmaXFZBX7JLsDfl+zDaXOisfc3ul7s1Cw+ynrsmOm4okLhvQ3H8NCCbYbTSB9ckGY93nDY+a/8jAv5ePG7dJezeGobjwO7UirZ/N/jSqknHB7LUEoNVEr1V0r9TinF2+FEVeC2lJYY37OZrtxovrtRNsnV+897/F7PfL0XALAt4yI2H8vG8n3nvGipiX2s3XTkAgDAnI0BL3yXjpeW7seq/efxyopDBs+2ex0XQXvo3HV4f+NxHDh3xWmd2oYLlIgCTEKMfmbO7QbDNM0NVpIeczKkseav1+O/v++vKTth3rrvt++k4vb5mzFv9WGv21pu103PNm8raCn68KcM62M7T16CK6XlxoHdfojmp6MXzHUrNKuFXfnvtpMY+/oGw71sn168B6nmOf6BhoGdKMAkxkXqyhynLAK2RGVG3rzzWs1520TdbTGdY1mejXPb59KxBEz7Fa1//nwnHrIbZgGAHScuu3xNS976JTtPIzO3yFqekW1r00tLTXWufWElej6/wqOhmSe/3I19Z3JR6DDn/sdDWfhkywnc8e5mt69REzGwEwUY+xwyHz/YFxMH6sfcLf4yUrty1bL5h+OUSACoMAdhS1KzyvjVNc3QrF60dbjlQl4J8orL0OGZZZp6q/ZnunydSwabhqefycVji3Zac86/uvIQHvjPNk2dczlF1pW2BV4skCp2uEfxeiV+nVhkXMiHUgrXvLACs5cdqPTrXA0GdqIAY+mJ39m3FQa1b4hnx3dxWvexke015/f2T9LVWfwH0+zkXq3q4abuTdxOq3R0eOYY63FspCmvoP3Ixhtrjnidg+avX+zSlZ222xkq9Wi24dDQSrsvjGIvAnu6w2bilZ3VmjRtKYbOXYcHPtyGywWleOdH/yzpYWAnCkAZs8fiH7d6F4ABoHFd0yyVXq3qISYiFHN+0wO9WiUAMOWCf+uu3miXGIueLeuhd+sEjwKyfbpi+5k4FifNY/XuJE1bio2HTePkaw7ob/Jm5xVbj50tdNp32la+It31r4Kf7VbN7jypHQpysVe5U/bXat1B/67VYWAnqiXuH5CEjua0wQ1jI5H+wo24LcU4T01ZeQW2/3IJT/5vt6a8ZwvbdoH/uLU71j0xFIDp1wNg23XqP/dfZ623dM9Zp21y3Mjk7ve3OK172G7P2ZVOgvaibbYUxtO/2uNynN3+9RrGam9IV2biZGkNWkjFwE4U5PbMGIWvHx2IGTd39fg5+86Yhia+3KFNEWU/pfCOPi2R1NCUpuCJUR0xpEOidbx/WKdG1mEZV4Z08Hyx4l673vgWJ2kQHE3+ZLvTXx3RdtNBt/9i671/uf2UdUYQYEtvbFFYUm74mo7j9BYlZdUf8BnYiYJcXFQ4rmlZzyevVVxWgS8n98cDA5M0s27q14nAwol9NDN2xvVo6vb1HMeyLT1/i5u6N7GmLnYVzP/zwHWG5cv3ZWLGN/rdpADttFH7VBF//WIXsq7Yhn3sF3/tO5ODzs/+gL99afslczG/BAfPXUHKzJWG71NcVv3LehjYicgrvVvXx3Pj3ff+X7ylm+bcaMVsvZgIjOjUyHq+6+RlzZzyt+7qjTHdmrh9ryHtnff8F/982m35Z1tPatIX27NPPzz29Y0AgP9tP2Vt5y1vbsLo19Y7nWvPHjsR1QiOK1k7NHY/z92R4x6w7RvFYf49vXX15v62p/V435lcfLLlhObxOh4M6TgmZ1trHvsHTLteGS1AchxmSj+Tq6sDQDfH3eKZr/eiokJZh23u6mucy8dZwK9KDOxEpNO9ebzmfHgnz/LLuFJcVo7+7RpABHhufBd8+8dBAEwJzuxZUhlYeBLYHSU4JEQbOHsNZi5NdznLx9lm5P9acdCw/JMtJzQzbyxfSJ88pN2UnD12IqoR/n1PiiYHfPN6UZg6uiM2/m1YpV9zUPtExEWF4/issXhgYBt0t5ths+rxIbr6c35j2jIwzkVgrxMRiiPmefRTR3e0ltdzSLtwJqcI7244jnZPfe90bnl+cRn2nNJPo1y+LxNdn/3BcIbNIx9v15UNaNcAr9zW0zodtcQPs2UY2IlIp36dCHw52batwl19W+PRYclokeDZpt0W654YigcGJiFj9li0aajf6MMiuVGc7kZqs3jTF0tIiGDlX4bg6Zs648vJ2nw2+SXl1imWjw5Lxsa/DbP+EnBm9rIDOJtjW+y0+q/XAzBtGrLSyYrY/JJyzPjW+CasIxHBhGtboH4d06+Gg35ITsbATkSG6kbZhjNCKrkUM6lhHY9utALaTJCAdpy/feM4PDykLXq3rq/Ji/Mvu/F5AGiREKP5JeBM/1lrrIupWtc3fVm98F26JpXAF49ov0QWpv7i0eewuGze4OTRT3d49TxfYGAnIkNR4aHo3LQuZt7azX3lKuBst6o37uxlPW5Z3/kviIzZY12+fkFJORrFRVp7/PY+erAPGsdFGTzLJKV1gsvXBoDcIs8yTFYFBnYicmrZY4NxV9/WfnlvZ7c529rt3WqUwsDe148OdPm4s+3/rkuqj7BQ4y+W52/uigUT+xg+tshu4/Hh5mmcncyrfasTAzsR1QiWsW6LLk3rGtazHyJyx93CrCsGOzcBpgyazrIej+vR1OkXSlyU7UZvcqM4dG1W15oXXymFH/ae9TohWmUwsBNRjdAuMRZ/HJZsPTfaAcqx3Cg3vSOjGTeOFjr0wEUEibGRaBqvH46pXycCIoJW5mGgH/482PqY49BQXFQYrpi3GFyy8wwe+XgHFthtMFJVvJ8gSkRURZyNqzuTaLDBiKPkRrahkLYN62h2kXrnbtOGI12a6X8dhIWGIHX6CKw5kImJH9o2BrGkUlj/pGnqZ0GJrdfv+GsiNjIMq/afx6dbTljrnbzkWbbLq8HATkQ1RoiLXZ8M63v4RTAouSHyist06Xlv7GbKZ+Ps1wEADO1gS3nQr61+E5KoMOfPXWXeY/apxXswyryJuNHG477GwE5ENcZvU1rg820n8OnD/VzW++KR/ppsj+58bF4N2uGZZYYrQaPsdpRyDN72Xx5zfq2dXun4uCuWVarn7bb2qyoM7ERUYzSrF42fpo9wW++6pPq4Lsn7Lfz2PT8ae07nYMJbP2nKw0JD8MUj/ZGcGIt6Mc5vzrZq4N0CrbAQQZnDzdKnxnb26jUqg4GdiGqN8NAQXGveMeqhQdpsk5X5orDYPWMUjPrtfxreHq+uOqQp8+SG79XyKLCLSDiAr5RS413UeRLABACXAPxKKaXfjZaIqAZwt3jJ0cSBbVBYajw1EnA+BXPKiGQ8Oqwdkp+2beYd7WI831fcBnYRiQawBUAHF3XaAuiqlOonIlMAtABwzGetJCLyI1cbhrsiIrqFTs6ySPqS23dQShUqpXoAOOWi2ggACSKyHsBgAMd91D4iIvKSr746EgFkKaWGwNRb16VXE5FJIpImImlZWf7dwZuIyB+mDE92X8kHfBXYcwFYstEfA9DcsYJSar5SKkUplZKY6PkGtkREgW5kZ9Nc+OiI6pmv4qvAvh1Aivk4GRxfJyKyijDPk69k9mOveR3YRaSNiMy1L1NKpQLIFpFtAA4qpbb6qoFERIHu+z3nAABvrD1SLe/n8e8CpVSy+b/HATxh8PhkH7aLiCjotE30flPwymB2RyKiKtbHvPhp5i3Vs2kJV54SEVWxeXdcgw9/ynCaY97XGNiJiKpY0/hoTB9T9TliLDgUQ0QUZBjYiYiCDAM7EVGQYWAnIgoyDOxEREGGgZ2IKMgwsBMRBRkGdiKiICNKKfe1fP2mIlkAfqnk0xsCuODD5gQiXgMTXgdeA6B2XYPWSim3ec/9EtivhoikKaVS3NcMXrwGJrwOvAYAr4ERDsUQEQUZBnYioiATiIF9vr8bUAPwGpjwOvAaALwGOgE3xk5ERK4FYo+diIhcCJjALiJRIvKdiOwSkY9EpJq2ha1+IrJARDaLyDciEuv4uY2uRTBeHxF5XERWiUhDEdkgIntEZLb5MY/KAp2IPGn+W1gmIo1q23UQkToiskRENonInNr8t+CNgAnsAO4GcEop1RNAAoAb/NyeKiEigwCEKaX6AagLYCL0n9voWgTV9RGR1gDuM5/+GcBSAD0BjBGRDl6UBSwRaQugq/lvYRmA11D7rsNdADYrpQYC6Arg36h918BrgRTYhwNYaT5eA2CYH9tSlTIBzDMfhwCYAf3nNroWwXZ95gGYbj4eDmClUqoCwI+w+7welAWyEQASRGQ9gMEA2qD2XYfLAGJFJBRANIABqH3XwGuBFNgbAMgxH+cCqO/HtlQZpdRhpdRWEbkVQAWAn6H/3EbXImiuj4jcCWAXgHRzkaefN2iugVkigCyl1BAALQD0Qe27DosB3AjgKID9MH2e2nYNvBZIgf0CgHjzcTyCeAmxiNwMYAqA8QDOQf+5ja5FMF2fcTD1VhcB6A3TkvHadg0AU0A6aD4+BiADte86TAfwtlIqCabg3AG17xp4LZAC+2oAo8zHwwGs9WNbqoyINAEwFcA4pdQVGH9uT8sCklLqTqXUIAC3A9gO4E0Ao0QkBMD1sPu8HpQFsu0ALEvlk2EK8rXtOsQBKDIfFwNIRe27Bl4LpMD+CYDmIrIbwEWY/o8LRvcBaApguYhsBBAO/ec2uhbBfH1eB3ATgN0AliqljnhRFrCUUqkAskVkG0xB/V7UvuvwJoDJIpIK0xj7rah918BrXKBERBRkAqnHTkREHmBgJyIKMgzsRERBhoGdiCjIMLATEQUZBnYioiDDwE5EFGT+H2+iJL5923PeAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learn.sched.plot_loss()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 588,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.save('clas_2')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true,
    "hidden": true
   },
   "source": [
    "## Course4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 532,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "em_sz = 200  # size of each embedding vector\n",
    "nh = 500     # number of hidden activations per layer\n",
    "nl = 3       # number of layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 533,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "opt_fn = partial(optim.Adam, betas=(0.7, 0.99))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 535,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "trn_dl = LanguageModelLoader(np.concatenate(trn_lm), bs, bptt)\n",
    "val_dl = LanguageModelLoader(np.concatenate(val_lm), bs, bptt)\n",
    "\n",
    "md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt)\n",
    "\n",
    "learner = md.get_model(opt_fn, em_sz, nh, nl,\n",
    "               dropouti=0.05, dropout=0.05, wdrop=0.1, dropoute=0.02, dropouth=0.05)\n",
    "learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)\n",
    "learner.clip=0.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 536,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bcf8c9a836b442239a6b59745bdef7d5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss                                \n",
      "    0      3.963112   3.886686  \n",
      "    1      3.642617   3.551546                                \n",
      "    2      3.509402   3.470327                                \n",
      "    3      3.538203   3.464301                                \n",
      "    4      3.446512   3.397923                                \n",
      "    5      3.356637   3.362462                                \n",
      "    6      3.326469   3.351762                                \n",
      "    7      3.459886   3.406591                                \n",
      "    8      3.41302    3.384714                                \n",
      "    9      3.375454   3.364341                                \n",
      "    10     3.338818   3.345472                                \n",
      "    11     3.297835   3.330314                                \n",
      "    12     3.268265   3.31928                                 \n",
      "    13     3.229251   3.314139                                \n",
      "    14     3.214018   3.313166                                \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([3.31317])]"
      ]
     },
     "execution_count": 536,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learner.fit(3e-3, 4, wds=1e-6, cycle_len=1, cycle_mult=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 537,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.save_encoder('adam1_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 538,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.load_encoder('adam1_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 539,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c1f1b1d43311436880993ab6549bcd3d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch      trn_loss   val_loss                                \n",
      "    0      3.370867   3.437287  \n",
      "    1      3.358299   3.38325                                 \n",
      "    2      3.315635   3.35539                                 \n",
      "    3      3.293123   3.344098                                \n",
      "    4      3.27162    3.332447                                \n",
      "    5      3.232894   3.32231                                 \n",
      "    6      3.199915   3.313746                                \n",
      "    7      3.169163   3.31064                                 \n",
      "    8      3.14949    3.305823                                \n",
      "    9      3.128014   3.308863                                \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([3.30886])]"
      ]
     },
     "execution_count": 539,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learner.fit(3e-3, 1, wds=1e-6, cycle_len=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 540,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.save_encoder('adam3_10_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 541,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learner.load_encoder('adam3_10_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 542,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# 分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 543,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# part 1\n",
    "dps = np.array([0.4, 0.5, 0.05, 0.3, 0.1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 546,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "m = get_rnn_classifier(bptt, 20*70, c, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,\n",
    "          layers=[em_sz*3, 50, c], drops=[dps[4], 0.1],\n",
    "          dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])\n",
    "m.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)\n",
    "# m.load_encoder(f'adam3_10_enc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 547,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=opt_fn)\n",
    "learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)\n",
    "learn.clip=.25\n",
    "learn.metrics = [accuracy]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "lrs=np.array([1e-4,1e-4,1e-4,1e-3,1e-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "learn.freeze_to(-1)\n",
    "learn.fit(lrs/2, 1, metrics=[accuracy])\n",
    "learn.unfreeze()\n",
    "learn.fit(lrs, 1, metrics=[accuracy], cycle_len=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "m3.fit(lrs, 7, metrics=[accuracy], cycle_len=2, cycle_save_name='call_reason2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "m3.load_cycle('call_reason2', 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "accuracy_np(*m3.predict_with_targs())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
